• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

lightningnetwork / lnd / 12392191719

18 Dec 2024 11:38AM UTC coverage: 58.677%. First build
12392191719

Pull #9260

github

yyforyongyu
itest: fix flake in `testCoopCloseWithExternalDeliveryImpl`

The response from `ClosedChannels` may not be up-to-date, so we wrap it
inside a wait closure.
Pull Request #9260: Beat itest [3/3]: fix all itest flakes

1 of 117 new or added lines in 6 files covered. (0.85%)

135149 of 230328 relevant lines covered (58.68%)

19156.08 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/lntest/node/harness_node.go
1
package node
2

3
import (
4
        "bytes"
5
        "context"
6
        "crypto/rand"
7
        "encoding/hex"
8
        "encoding/json"
9
        "fmt"
10
        "io"
11
        "os"
12
        "os/exec"
13
        "path/filepath"
14
        "strings"
15
        "testing"
16
        "time"
17

18
        "github.com/jackc/pgx/v4/pgxpool"
19
        "github.com/lightningnetwork/lnd"
20
        "github.com/lightningnetwork/lnd/lnrpc"
21
        "github.com/lightningnetwork/lnd/lntest/rpc"
22
        "github.com/lightningnetwork/lnd/lntest/wait"
23
        "github.com/lightningnetwork/lnd/macaroons"
24
        "google.golang.org/grpc"
25
        "google.golang.org/grpc/codes"
26
        "google.golang.org/grpc/credentials"
27
        "google.golang.org/grpc/status"
28
        "gopkg.in/macaroon.v2"
29
)
30

31
const (
32
        // logPubKeyBytes is the number of bytes of the node's PubKey that will
33
        // be appended to the log file name. The whole PubKey is too long and
34
        // not really necessary to quickly identify what node produced which
35
        // log file.
36
        logPubKeyBytes = 4
37

38
        // trickleDelay is the amount of time in milliseconds between each
39
        // release of announcements by AuthenticatedGossiper to the network.
40
        trickleDelay = 50
41

42
        postgresDsn = "postgres://postgres:postgres@localhost:" +
43
                "6432/%s?sslmode=disable"
44

45
        // commitInterval specifies the maximum interval the graph database
46
        // will wait between attempting to flush a batch of modifications to
47
        // disk(db.batch-commit-interval).
48
        commitInterval = 10 * time.Millisecond
49
)
50

51
// HarnessNode represents an instance of lnd running within our test network
52
// harness. It's responsible for managing the lnd process, grpc connection, and
53
// wallet auth. A HarnessNode is built upon its rpc clients, represented in
54
// `HarnessRPC`. It also has a `State` which holds its internal state, and a
55
// `Watcher` that keeps track of its topology updates.
56
type HarnessNode struct {
57
        *testing.T
58

59
        // Cfg holds the config values for the node.
60
        Cfg *BaseNodeConfig
61

62
        // RPC holds a list of RPC clients.
63
        RPC *rpc.HarnessRPC
64

65
        // State records the current state of the node.
66
        State *State
67

68
        // Watcher watches the node's topology updates.
69
        Watcher *nodeWatcher
70

71
        // PubKey is the serialized compressed identity public key of the node.
72
        // This field will only be populated once the node itself has been
73
        // started via the start() method.
74
        PubKey    [33]byte
75
        PubKeyStr string
76

77
        // conn is the underlying connection to the grpc endpoint of the node.
78
        conn *grpc.ClientConn
79

80
        // runCtx is a context with cancel method. It's used to signal when the
81
        // node needs to quit, and used as the parent context when spawning
82
        // children contexts for RPC requests.
83
        runCtx context.Context //nolint:containedctx
84
        cancel context.CancelFunc
85

86
        // filename is the log file's name.
87
        filename string
88

89
        cmd     *exec.Cmd
90
        logFile *os.File
91
}
92

93
// NewHarnessNode creates a new test lightning node instance from the passed
94
// config.
95
func NewHarnessNode(t *testing.T, cfg *BaseNodeConfig) (*HarnessNode, error) {
×
96
        if cfg.BaseDir == "" {
×
97
                var err error
×
98

×
99
                // Create a temporary directory for the node's data and logs.
×
100
                // Use dash suffix as a separator between base name and random
×
101
                // suffix.
×
102
                dirBaseName := fmt.Sprintf("lndtest-node-%s-", cfg.Name)
×
103
                cfg.BaseDir, err = os.MkdirTemp("", dirBaseName)
×
104
                if err != nil {
×
105
                        return nil, err
×
106
                }
×
107
        }
108
        cfg.DataDir = filepath.Join(cfg.BaseDir, "data")
×
109
        cfg.LogDir = filepath.Join(cfg.BaseDir, "logs")
×
110
        cfg.TLSCertPath = filepath.Join(cfg.BaseDir, "tls.cert")
×
111
        cfg.TLSKeyPath = filepath.Join(cfg.BaseDir, "tls.key")
×
112

×
113
        networkDir := filepath.Join(
×
114
                cfg.DataDir, "chain", lnd.BitcoinChainName, cfg.NetParams.Name,
×
115
        )
×
116
        cfg.AdminMacPath = filepath.Join(networkDir, "admin.macaroon")
×
117
        cfg.ReadMacPath = filepath.Join(networkDir, "readonly.macaroon")
×
118
        cfg.InvoiceMacPath = filepath.Join(networkDir, "invoice.macaroon")
×
119

×
120
        cfg.GenerateListeningPorts()
×
121

×
122
        // Create temporary database.
×
123
        var dbName string
×
124
        if cfg.DBBackend == BackendPostgres {
×
125
                var err error
×
126
                dbName, err = createTempPgDB()
×
127
                if err != nil {
×
128
                        return nil, err
×
129
                }
×
130
                cfg.PostgresDsn = postgresDatabaseDsn(dbName)
×
131
        }
132

133
        cfg.OriginalExtraArgs = cfg.ExtraArgs
×
134
        cfg.postgresDBName = dbName
×
135

×
136
        return &HarnessNode{
×
137
                T:   t,
×
138
                Cfg: cfg,
×
139
        }, nil
×
140
}
141

142
// Initialize creates a list of new RPC clients using the passed connection,
143
// initializes the node's internal state and creates a topology watcher.
144
func (hn *HarnessNode) Initialize(c *grpc.ClientConn) {
×
145
        hn.conn = c
×
146

×
147
        // Init all the rpc clients.
×
148
        hn.RPC = rpc.NewHarnessRPC(hn.runCtx, hn.T, c, hn.Name())
×
149

×
150
        // Init the node's state.
×
151
        //
×
152
        // If we already have a state, it means we are restarting the node and
×
153
        // we will only reset its internal states. Otherwise we'll create a new
×
154
        // state.
×
155
        if hn.State != nil {
×
156
                hn.State.resetEphermalStates(hn.RPC)
×
157
        } else {
×
158
                hn.State = newState(hn.RPC)
×
159
        }
×
160

161
        // Init the topology watcher.
162
        hn.Watcher = newNodeWatcher(hn.RPC, hn.State)
×
163
}
164

165
// Name returns the name of this node set during initialization.
166
func (hn *HarnessNode) Name() string {
×
167
        return hn.Cfg.Name
×
168
}
×
169

170
// UpdateState updates the node's internal state.
171
func (hn *HarnessNode) UpdateState() {
×
172
        hn.State.updateState()
×
173
}
×
174

175
// String gives the internal state of the node which is useful for debugging.
176
func (hn *HarnessNode) String() string {
×
177
        type nodeCfg struct {
×
178
                LogFilenamePrefix string
×
179
                ExtraArgs         []string
×
180
                SkipUnlock        bool
×
181
                Password          []byte
×
182
                P2PPort           int
×
183
                RPCPort           int
×
184
                RESTPort          int
×
185
                AcceptKeySend     bool
×
186
                FeeURL            string
×
187
        }
×
188

×
189
        nodeState := struct {
×
190
                NodeID  uint32
×
191
                Name    string
×
192
                PubKey  string
×
193
                State   *State
×
194
                NodeCfg nodeCfg
×
195
        }{
×
196
                NodeID: hn.Cfg.NodeID,
×
197
                Name:   hn.Cfg.Name,
×
198
                PubKey: hn.PubKeyStr,
×
199
                State:  hn.State,
×
200
                NodeCfg: nodeCfg{
×
201
                        SkipUnlock:        hn.Cfg.SkipUnlock,
×
202
                        Password:          hn.Cfg.Password,
×
203
                        LogFilenamePrefix: hn.Cfg.LogFilenamePrefix,
×
204
                        ExtraArgs:         hn.Cfg.ExtraArgs,
×
205
                        P2PPort:           hn.Cfg.P2PPort,
×
206
                        RPCPort:           hn.Cfg.RPCPort,
×
207
                        RESTPort:          hn.Cfg.RESTPort,
×
208
                },
×
209
        }
×
210

×
211
        stateBytes, err := json.MarshalIndent(nodeState, "", "\t")
×
212
        if err != nil {
×
213
                return fmt.Sprintf("\n encode node state with err: %v", err)
×
214
        }
×
215

216
        return fmt.Sprintf("\nnode state: %s", stateBytes)
×
217
}
218

219
// WaitUntilStarted waits until the wallet state flips from "WAITING_TO_START".
220
func (hn *HarnessNode) WaitUntilStarted() error {
×
221
        return hn.waitTillServerState(func(s lnrpc.WalletState) bool {
×
222
                return s != lnrpc.WalletState_WAITING_TO_START
×
223
        })
×
224
}
225

226
// WaitUntilServerActive waits until the lnd daemon is fully started.
227
func (hn *HarnessNode) WaitUntilServerActive() error {
×
228
        return hn.waitTillServerState(func(s lnrpc.WalletState) bool {
×
229
                return s == lnrpc.WalletState_SERVER_ACTIVE
×
230
        })
×
231
}
232

233
// WaitUntilLeader attempts to finish the start procedure by initiating an RPC
234
// connection and setting up the wallet unlocker client. This is needed when
235
// a node that has recently been started was waiting to become the leader and
236
// we're at the point when we expect that it is the leader now (awaiting
237
// unlock).
238
func (hn *HarnessNode) WaitUntilLeader(timeout time.Duration) error {
×
239
        var (
×
240
                conn    *grpc.ClientConn
×
241
                connErr error
×
242
        )
×
243

×
244
        if err := wait.NoError(func() error {
×
245
                conn, connErr = hn.ConnectRPCWithMacaroon(nil)
×
246
                return connErr
×
247
        }, timeout); err != nil {
×
248
                return err
×
249
        }
×
250

251
        // Since the conn is not authed, only the `WalletUnlocker` and `State`
252
        // clients can be inited from this conn.
253
        hn.conn = conn
×
254
        hn.RPC = rpc.NewHarnessRPC(hn.runCtx, hn.T, conn, hn.Name())
×
255

×
256
        // Wait till the server is starting.
×
257
        return hn.WaitUntilStarted()
×
258
}
259

260
// Unlock attempts to unlock the wallet of the target HarnessNode. This method
261
// should be called after the restart of a HarnessNode that was created with a
262
// seed+password. Once this method returns, the HarnessNode will be ready to
263
// accept normal gRPC requests and harness command.
264
func (hn *HarnessNode) Unlock(unlockReq *lnrpc.UnlockWalletRequest) error {
×
265
        // Otherwise, we'll need to unlock the node before it's able to start
×
266
        // up properly.
×
267
        hn.RPC.UnlockWallet(unlockReq)
×
268

×
269
        // Now that the wallet has been unlocked, we'll wait for the RPC client
×
270
        // to be ready, then establish the normal gRPC connection.
×
271
        return hn.InitNode(nil)
×
272
}
×
273

274
// AddToLogf adds a line of choice to the node's logfile. This is useful
275
// to interleave test output with output from the node.
276
func (hn *HarnessNode) AddToLogf(format string, a ...interface{}) {
×
277
        // If this node was not set up with a log file, just return early.
×
278
        if hn.logFile == nil {
×
279
                return
×
280
        }
×
281

282
        desc := fmt.Sprintf("itest: %s\n", fmt.Sprintf(format, a...))
×
283
        if _, err := hn.logFile.WriteString(desc); err != nil {
×
284
                hn.printErrf("write to log err: %v", err)
×
285
        }
×
286
}
287

288
// ReadMacaroon waits a given duration for the macaroon file to be created. If
289
// the file is readable within the timeout, its content is de-serialized as a
290
// macaroon and returned.
291
func (hn *HarnessNode) ReadMacaroon(macPath string, timeout time.Duration) (
292
        *macaroon.Macaroon, error) {
×
293

×
294
        // Wait until macaroon file is created and has valid content before
×
295
        // using it.
×
296
        var mac *macaroon.Macaroon
×
297
        err := wait.NoError(func() error {
×
298
                macBytes, err := os.ReadFile(macPath)
×
299
                if err != nil {
×
300
                        return fmt.Errorf("error reading macaroon file: %w",
×
301
                                err)
×
302
                }
×
303

304
                newMac := &macaroon.Macaroon{}
×
305
                if err = newMac.UnmarshalBinary(macBytes); err != nil {
×
306
                        return fmt.Errorf("error unmarshalling macaroon "+
×
307
                                "file: %w", err)
×
308
                }
×
309
                mac = newMac
×
310

×
311
                return nil
×
312
        }, timeout)
313

314
        return mac, err
×
315
}
316

317
// ConnectRPCWithMacaroon uses the TLS certificate and given macaroon to
318
// create a gRPC client connection.
319
func (hn *HarnessNode) ConnectRPCWithMacaroon(mac *macaroon.Macaroon) (
320
        *grpc.ClientConn, error) {
×
321

×
322
        // Wait until TLS certificate is created and has valid content before
×
323
        // using it, up to 30 sec.
×
324
        var tlsCreds credentials.TransportCredentials
×
325
        err := wait.NoError(func() error {
×
326
                var err error
×
327
                tlsCreds, err = credentials.NewClientTLSFromFile(
×
328
                        hn.Cfg.TLSCertPath, "",
×
329
                )
×
330
                return err
×
331
        }, wait.DefaultTimeout)
×
332
        if err != nil {
×
333
                return nil, fmt.Errorf("error reading TLS cert: %w", err)
×
334
        }
×
335

336
        opts := []grpc.DialOption{
×
337
                grpc.WithBlock(),
×
338
                grpc.WithTransportCredentials(tlsCreds),
×
339
        }
×
340

×
341
        ctx, cancel := context.WithTimeout(hn.runCtx, wait.DefaultTimeout)
×
342
        defer cancel()
×
343

×
344
        if mac == nil {
×
345
                return grpc.DialContext(ctx, hn.Cfg.RPCAddr(), opts...)
×
346
        }
×
347
        macCred, err := macaroons.NewMacaroonCredential(mac)
×
348
        if err != nil {
×
349
                return nil, fmt.Errorf("error cloning mac: %w", err)
×
350
        }
×
351
        opts = append(opts, grpc.WithPerRPCCredentials(macCred))
×
352

×
353
        return grpc.DialContext(ctx, hn.Cfg.RPCAddr(), opts...)
×
354
}
355

356
// ConnectRPC uses the TLS certificate and admin macaroon files written by the
357
// lnd node to create a gRPC client connection.
358
func (hn *HarnessNode) ConnectRPC() (*grpc.ClientConn, error) {
×
359
        // If we should use a macaroon, always take the admin macaroon as a
×
360
        // default.
×
361
        mac, err := hn.ReadMacaroon(hn.Cfg.AdminMacPath, wait.DefaultTimeout)
×
362
        if err != nil {
×
363
                return nil, err
×
364
        }
×
365

366
        return hn.ConnectRPCWithMacaroon(mac)
×
367
}
368

369
// SetExtraArgs assigns the ExtraArgs field for the node's configuration. The
370
// changes will take effect on restart.
371
func (hn *HarnessNode) SetExtraArgs(extraArgs []string) {
×
372
        hn.Cfg.ExtraArgs = extraArgs
×
373
}
×
374

375
// StartLndCmd handles the startup of lnd, creating log files, and possibly
376
// kills the process when needed.
377
func (hn *HarnessNode) StartLndCmd(ctxb context.Context) error {
×
378
        // Init the run context.
×
379
        hn.runCtx, hn.cancel = context.WithCancel(ctxb)
×
380

×
381
        args := hn.Cfg.GenArgs()
×
382
        hn.cmd = exec.Command(hn.Cfg.LndBinary, args...)
×
383

×
384
        // Redirect stderr output to buffer
×
385
        var errb bytes.Buffer
×
386
        hn.cmd.Stderr = &errb
×
387

×
388
        // If the logoutput flag is passed, redirect output from the nodes to
×
389
        // log files.
×
390
        if *logOutput {
×
391
                err := addLogFile(hn)
×
392
                if err != nil {
×
393
                        return err
×
394
                }
×
395
        }
396

397
        // Start the process.
398
        if err := hn.cmd.Start(); err != nil {
×
399
                return err
×
400
        }
×
401

402
        pid := hn.cmd.Process.Pid
×
403
        hn.T.Logf("Starting node (name=%v) with PID=%v", hn.Cfg.Name, pid)
×
404

×
405
        return nil
×
406
}
407

408
// StartWithNoAuth will start the lnd process, creates the grpc connection
409
// without macaroon auth, and waits until the server is reported as waiting to
410
// start.
411
//
412
// NOTE: caller needs to take extra step to create and unlock the wallet.
413
func (hn *HarnessNode) StartWithNoAuth(ctxt context.Context) error {
×
414
        // Start lnd process and prepare logs.
×
415
        if err := hn.StartLndCmd(ctxt); err != nil {
×
416
                return fmt.Errorf("start lnd error: %w", err)
×
417
        }
×
418

419
        // Create an unauthed connection.
420
        conn, err := hn.ConnectRPCWithMacaroon(nil)
×
421
        if err != nil {
×
422
                return fmt.Errorf("ConnectRPCWithMacaroon err: %w", err)
×
423
        }
×
424

425
        // Since the conn is not authed, only the `WalletUnlocker` and `State`
426
        // clients can be inited from this conn.
427
        hn.conn = conn
×
428
        hn.RPC = rpc.NewHarnessRPC(hn.runCtx, hn.T, conn, hn.Name())
×
429

×
430
        // Wait till the server is starting.
×
431
        return hn.WaitUntilStarted()
×
432
}
433

434
// Start will start the lnd process, creates the grpc connection, and waits
435
// until the server is fully started.
436
func (hn *HarnessNode) Start(ctxt context.Context) error {
×
437
        // Start lnd process and prepare logs.
×
438
        if err := hn.StartLndCmd(ctxt); err != nil {
×
439
                return fmt.Errorf("start lnd error: %w", err)
×
440
        }
×
441

442
        // Since Stop uses the LightningClient to stop the node, if we fail to
443
        // get a connected client, we have to kill the process.
444
        conn, err := hn.ConnectRPC()
×
445
        if err != nil {
×
446
                err = fmt.Errorf("ConnectRPC err: %w", err)
×
447
                cmdErr := hn.Kill()
×
448
                if cmdErr != nil {
×
449
                        err = fmt.Errorf("kill process got err: %w: %v",
×
450
                                cmdErr, err)
×
451
                }
×
452
                return err
×
453
        }
454

455
        // Init the node by creating the RPC clients, initializing node's
456
        // internal state and watcher.
457
        hn.Initialize(conn)
×
458

×
459
        // Wait till the server is starting.
×
460
        if err := hn.WaitUntilStarted(); err != nil {
×
461
                return fmt.Errorf("waiting for start got: %w", err)
×
462
        }
×
463

464
        // Subscribe for topology updates.
465
        return hn.initLightningClient()
×
466
}
467

468
// InitNode waits until the main gRPC server is detected as active, then
469
// complete the normal HarnessNode gRPC connection creation. A non-nil
470
// `macBytes` indicates the node is initialized stateless, otherwise it will
471
// use the admin macaroon.
472
func (hn *HarnessNode) InitNode(macBytes []byte) error {
×
473
        var (
×
474
                conn *grpc.ClientConn
×
475
                err  error
×
476
        )
×
477

×
478
        // If the node has been initialized stateless, we need to pass the
×
479
        // macaroon to the client.
×
480
        if macBytes != nil {
×
481
                adminMac := &macaroon.Macaroon{}
×
482
                err := adminMac.UnmarshalBinary(macBytes)
×
483
                if err != nil {
×
484
                        return fmt.Errorf("unmarshal failed: %w", err)
×
485
                }
×
486
                conn, err = hn.ConnectRPCWithMacaroon(adminMac)
×
487
                if err != nil {
×
488
                        return err
×
489
                }
×
490
        } else {
×
491
                // Normal initialization, we expect a macaroon to be in the
×
492
                // file system.
×
493
                conn, err = hn.ConnectRPC()
×
494
                if err != nil {
×
495
                        return err
×
496
                }
×
497
        }
498

499
        // Init the node by creating the RPC clients, initializing node's
500
        // internal state and watcher.
501
        hn.Initialize(conn)
×
502

×
503
        // Wait till the server is starting.
×
504
        if err := hn.WaitUntilStarted(); err != nil {
×
505
                return fmt.Errorf("waiting for start got: %w", err)
×
506
        }
×
507

508
        return hn.initLightningClient()
×
509
}
510

511
// InitChangePassword initializes a harness node by passing the change password
512
// request via RPC. After the request is submitted, this method will block until
513
// a macaroon-authenticated RPC connection can be established to the harness
514
// node. Once established, the new connection is used to initialize the
515
// RPC clients and subscribes the HarnessNode to topology changes.
516
func (hn *HarnessNode) ChangePasswordAndInit(
517
        req *lnrpc.ChangePasswordRequest) (
518
        *lnrpc.ChangePasswordResponse, error) {
×
519

×
520
        response := hn.RPC.ChangePassword(req)
×
521
        return response, hn.InitNode(response.AdminMacaroon)
×
522
}
×
523

524
// waitTillServerState makes a subscription to the server's state change and
525
// blocks until the server is in the targeted state.
526
func (hn *HarnessNode) waitTillServerState(
527
        predicate func(state lnrpc.WalletState) bool) error {
×
528

×
529
        client := hn.RPC.SubscribeState()
×
530

×
531
        errChan := make(chan error, 1)
×
532
        done := make(chan struct{})
×
533
        go func() {
×
534
                for {
×
535
                        resp, err := client.Recv()
×
536
                        if err != nil {
×
537
                                errChan <- err
×
538
                                return
×
539
                        }
×
540

541
                        if predicate(resp.State) {
×
542
                                close(done)
×
543
                                return
×
544
                        }
×
545
                }
546
        }()
547

548
        for {
×
549
                select {
×
550
                case <-time.After(wait.NodeStartTimeout):
×
551
                        return fmt.Errorf("timeout waiting for server state")
×
552
                case err := <-errChan:
×
553
                        return fmt.Errorf("receive server state err: %w", err)
×
554

555
                case <-done:
×
556
                        return nil
×
557
                }
558
        }
559
}
560

561
// initLightningClient blocks until the lnd server is fully started and
562
// subscribes the harness node to graph topology updates. This method also
563
// spawns a lightning network watcher for this node, which watches for topology
564
// changes.
565
func (hn *HarnessNode) initLightningClient() error {
×
566
        // Wait until the server is fully started.
×
567
        if err := hn.WaitUntilServerActive(); err != nil {
×
568
                return fmt.Errorf("waiting for server active: %w", err)
×
569
        }
×
570

571
        // Set the harness node's pubkey to what the node claims in GetInfo.
572
        // The RPC must have been started at this point.
573
        if err := hn.attachPubKey(); err != nil {
×
574
                return err
×
575
        }
×
576

577
        // Launch the watcher that will hook into graph related topology change
578
        // from the PoV of this node.
579
        started := make(chan error, 1)
×
580
        go hn.Watcher.topologyWatcher(hn.runCtx, started)
×
581

×
582
        select {
×
583
        // First time reading the channel indicates the topology client is
584
        // started.
585
        case err := <-started:
×
586
                if err != nil {
×
587
                        return fmt.Errorf("create topology client stream "+
×
588
                                "got err: %v", err)
×
589
                }
×
590

591
        case <-time.After(wait.DefaultTimeout):
×
592
                return fmt.Errorf("timeout creating topology client stream")
×
593
        }
594

595
        // Catch topology client stream error inside a goroutine.
596
        go func() {
×
597
                select {
×
598
                case err := <-started:
×
599
                        hn.printErrf("topology client: %v", err)
×
600

601
                case <-hn.runCtx.Done():
×
602
                }
603
        }()
604

605
        return nil
×
606
}
607

608
// attachPubKey queries an unlocked node to retrieve its public key.
609
func (hn *HarnessNode) attachPubKey() error {
×
610
        // Obtain the lnid of this node for quick identification purposes.
×
611
        info := hn.RPC.GetInfo()
×
612
        hn.PubKeyStr = info.IdentityPubkey
×
613

×
614
        pubkey, err := hex.DecodeString(info.IdentityPubkey)
×
615
        if err != nil {
×
616
                return err
×
617
        }
×
618
        copy(hn.PubKey[:], pubkey)
×
619

×
620
        return nil
×
621
}
622

623
// cleanup cleans up all the temporary files created by the node's process.
624
func (hn *HarnessNode) cleanup() error {
×
625
        if hn.Cfg.backupDBDir != "" {
×
626
                err := os.RemoveAll(hn.Cfg.backupDBDir)
×
627
                if err != nil {
×
628
                        return fmt.Errorf("unable to remove backup dir: %w",
×
629
                                err)
×
630
                }
×
631
        }
632

633
        return os.RemoveAll(hn.Cfg.BaseDir)
×
634
}
635

636
// waitForProcessExit Launch a new goroutine which that bubbles up any
637
// potential fatal process errors to the goroutine running the tests.
638
func (hn *HarnessNode) WaitForProcessExit() error {
×
NEW
639
        var errReturned error
×
640

×
641
        errChan := make(chan error, 1)
×
642
        go func() {
×
NEW
643
                errChan <- hn.cmd.Wait()
×
644
        }()
×
645

646
        select {
×
647
        case err := <-errChan:
×
648
                if err == nil {
×
649
                        break
×
650
                }
651

652
                // If the process has already been canceled, we can exit early
653
                // as the logs have already been saved.
654
                if strings.Contains(err.Error(), "Wait was already called") {
×
655
                        return nil
×
656
                }
×
657

658
                // The process may have already been killed in the test, in
659
                // that case we will skip the error and continue processing
660
                // the logs.
NEW
661
                if strings.Contains(err.Error(), "signal: killed") {
×
NEW
662
                        break
×
663
                }
664

665
                // Otherwise, we print the error, break the select and save
666
                // logs.
667
                hn.printErrf("wait process exit got err: %v", err)
×
NEW
668
                errReturned = err
×
669

670
        case <-time.After(wait.DefaultTimeout):
×
671
                hn.printErrf("timeout waiting for process to exit")
×
672
        }
673

674
        // Make sure log file is closed and renamed if necessary.
NEW
675
        filename := finalizeLogfile(hn)
×
676

×
NEW
677
        // Assert the node has shut down from the log file.
×
NEW
678
        err1 := assertNodeShutdown(filename)
×
NEW
679
        if err1 != nil {
×
NEW
680
                return fmt.Errorf("[%s]: assert shutdown failed in log[%s]: %w",
×
NEW
681
                        hn.Name(), filename, err1)
×
NEW
682
        }
×
683

684
        // Rename the etcd.log file if the node was running on embedded etcd.
685
        finalizeEtcdLog(hn)
×
686

×
NEW
687
        return errReturned
×
688
}
689

690
// Stop attempts to stop the active lnd process.
691
func (hn *HarnessNode) Stop() error {
×
692
        // Do nothing if the process is not running.
×
693
        if hn.runCtx == nil {
×
694
                hn.printErrf("found nil run context")
×
695
                return nil
×
696
        }
×
697

698
        // Stop the runCtx.
699
        hn.cancel()
×
700

×
701
        // If we ever reaches the state where `Watcher` is initialized, it
×
702
        // means the node has an authed connection and all its RPC clients are
×
703
        // ready for use. Thus we will try to stop it via the RPC.
×
704
        if hn.Watcher != nil {
×
705
                // Don't watch for error because sometimes the RPC connection
×
706
                // gets closed before a response is returned.
×
707
                req := lnrpc.StopRequest{}
×
708

×
709
                ctxt, cancel := context.WithCancel(context.Background())
×
710
                defer cancel()
×
711

×
712
                err := wait.NoError(func() error {
×
713
                        _, err := hn.RPC.LN.StopDaemon(ctxt, &req)
×
NEW
714
                        if err == nil {
×
715
                                return nil
×
NEW
716
                        }
×
717

718
                        // If the connection is already closed, we can exit
719
                        // early as the node has already been shut down in the
720
                        // test, e.g., in etcd leader health check test.
NEW
721
                        if strings.Contains(err.Error(), "connection refused") {
×
722
                                return nil
×
723
                        }
×
724

NEW
725
                        return err
×
726
                }, wait.DefaultTimeout)
727
                if err != nil {
×
NEW
728
                        return fmt.Errorf("shutdown timeout: %w", err)
×
729
                }
×
730

731
                // Wait for goroutines to be finished.
732
                done := make(chan struct{})
×
733
                go func() {
×
734
                        hn.Watcher.wg.Wait()
×
735
                        close(done)
×
NEW
736
                        hn.Watcher = nil
×
737
                }()
×
738

739
                // If the goroutines fail to finish before timeout, we'll print
740
                // the error to console and continue.
741
                select {
×
742
                case <-time.After(wait.DefaultTimeout):
×
743
                        hn.printErrf("timeout on wait group")
×
744
                case <-done:
×
745
                }
746
        } else {
×
747
                // If the rpc clients are not initiated, we'd kill the process
×
748
                // manually.
×
749
                hn.printErrf("found nil RPC clients")
×
750
                if err := hn.Kill(); err != nil {
×
751
                        // Skip the error if the process is already dead.
×
752
                        if !strings.Contains(
×
753
                                err.Error(), "process already finished",
×
754
                        ) {
×
755

×
756
                                return fmt.Errorf("killing process got: %w",
×
757
                                        err)
×
758
                        }
×
759
                }
760
        }
761

762
        // Close any attempts at further grpc connections.
763
        if hn.conn != nil {
×
764
                if err := hn.CloseConn(); err != nil {
×
765
                        return err
×
766
                }
×
767
        }
768

769
        // Wait for lnd process to exit in the end.
770
        return hn.WaitForProcessExit()
×
771
}
772

773
// CloseConn closes the grpc connection.
774
func (hn *HarnessNode) CloseConn() error {
×
775
        err := status.Code(hn.conn.Close())
×
776
        switch err {
×
777
        case codes.OK:
×
778
                return nil
×
779

780
        // When the context is canceled above, we might get the
781
        // following error as the context is no longer active.
782
        case codes.Canceled:
×
783
                return nil
×
784

785
        case codes.Unknown:
×
786
                return fmt.Errorf("unknown error attempting to stop "+
×
787
                        "grpc client: %v", err)
×
788

789
        default:
×
790
                return fmt.Errorf("error attempting to stop "+
×
791
                        "grpc client: %v", err)
×
792
        }
793
}
794

795
// Shutdown stops the active lnd process and cleans up any temporary
796
// directories created along the way.
797
func (hn *HarnessNode) Shutdown() error {
×
798
        if err := hn.Stop(); err != nil {
×
799
                return err
×
800
        }
×
801
        if err := hn.cleanup(); err != nil {
×
802
                return err
×
803
        }
×
804
        return nil
×
805
}
806

807
// Kill kills the lnd process.
808
func (hn *HarnessNode) Kill() error {
×
809
        return hn.cmd.Process.Kill()
×
810
}
×
811

812
// KillAndWait kills the lnd process and waits for it to finish.
813
func (hn *HarnessNode) KillAndWait() error {
×
814
        err := hn.cmd.Process.Kill()
×
815
        if err != nil {
×
816
                return err
×
817
        }
×
818

819
        _, err = hn.cmd.Process.Wait()
×
820

×
821
        return err
×
822
}
823

824
// printErrf prints an error to the console.
825
func (hn *HarnessNode) printErrf(format string, a ...interface{}) {
×
826
        fmt.Printf("itest error from [%s:%s]: %s\n", //nolint:forbidigo
×
827
                hn.Cfg.LogFilenamePrefix, hn.Cfg.Name,
×
828
                fmt.Sprintf(format, a...))
×
829
}
×
830

831
// BackupDB creates a backup of the current database.
832
func (hn *HarnessNode) BackupDB() error {
×
833
        if hn.Cfg.backupDBDir != "" {
×
834
                return fmt.Errorf("backup already created")
×
835
        }
×
836

837
        if hn.Cfg.postgresDBName != "" {
×
838
                // Backup database.
×
839
                backupDBName := hn.Cfg.postgresDBName + "_backup"
×
840
                err := executePgQuery(
×
841
                        "CREATE DATABASE " + backupDBName + " WITH TEMPLATE " +
×
842
                                hn.Cfg.postgresDBName,
×
843
                )
×
844
                if err != nil {
×
845
                        return err
×
846
                }
×
847
        } else {
×
848
                // Backup files.
×
849
                tempDir, err := os.MkdirTemp("", "past-state")
×
850
                if err != nil {
×
851
                        return fmt.Errorf("unable to create temp db folder: %w",
×
852
                                err)
×
853
                }
×
854

855
                if err := copyAll(tempDir, hn.Cfg.DBDir()); err != nil {
×
856
                        return fmt.Errorf("unable to copy database files: %w",
×
857
                                err)
×
858
                }
×
859

860
                hn.Cfg.backupDBDir = tempDir
×
861
        }
862

863
        return nil
×
864
}
865

866
// RestoreDB restores a database backup.
867
func (hn *HarnessNode) RestoreDB() error {
×
868
        if hn.Cfg.postgresDBName != "" {
×
869
                // Restore database.
×
870
                backupDBName := hn.Cfg.postgresDBName + "_backup"
×
871
                err := executePgQuery(
×
872
                        "DROP DATABASE " + hn.Cfg.postgresDBName,
×
873
                )
×
874
                if err != nil {
×
875
                        return err
×
876
                }
×
877
                err = executePgQuery(
×
878
                        "ALTER DATABASE " + backupDBName + " RENAME TO " +
×
879
                                hn.Cfg.postgresDBName,
×
880
                )
×
881
                if err != nil {
×
882
                        return err
×
883
                }
×
884
        } else {
×
885
                // Restore files.
×
886
                if hn.Cfg.backupDBDir == "" {
×
887
                        return fmt.Errorf("no database backup created")
×
888
                }
×
889

890
                err := copyAll(hn.Cfg.DBDir(), hn.Cfg.backupDBDir)
×
891
                if err != nil {
×
892
                        return fmt.Errorf("unable to copy database files: %w",
×
893
                                err)
×
894
                }
×
895

896
                if err := os.RemoveAll(hn.Cfg.backupDBDir); err != nil {
×
897
                        return fmt.Errorf("unable to remove backup dir: %w",
×
898
                                err)
×
899
                }
×
900
                hn.Cfg.backupDBDir = ""
×
901
        }
902

903
        return nil
×
904
}
905

906
// UpdateGlobalPolicy updates a node's global channel policy.
907
func (hn *HarnessNode) UpdateGlobalPolicy(policy *lnrpc.RoutingPolicy) {
×
908
        updateFeeReq := &lnrpc.PolicyUpdateRequest{
×
909
                BaseFeeMsat: policy.FeeBaseMsat,
×
910
                FeeRate: float64(policy.FeeRateMilliMsat) /
×
911
                        float64(1_000_000),
×
912
                TimeLockDelta: policy.TimeLockDelta,
×
913
                Scope:         &lnrpc.PolicyUpdateRequest_Global{Global: true},
×
914
                MaxHtlcMsat:   policy.MaxHtlcMsat,
×
915
        }
×
916
        hn.RPC.UpdateChannelPolicy(updateFeeReq)
×
917
}
×
918

919
func postgresDatabaseDsn(dbName string) string {
×
920
        return fmt.Sprintf(postgresDsn, dbName)
×
921
}
×
922

923
// createTempPgDB creates a temp postgres database.
924
func createTempPgDB() (string, error) {
×
925
        // Create random database name.
×
926
        randBytes := make([]byte, 8)
×
927
        _, err := rand.Read(randBytes)
×
928
        if err != nil {
×
929
                return "", err
×
930
        }
×
931
        dbName := "itest_" + hex.EncodeToString(randBytes)
×
932

×
933
        // Create database.
×
934
        err = executePgQuery("CREATE DATABASE " + dbName)
×
935
        if err != nil {
×
936
                return "", err
×
937
        }
×
938

939
        return dbName, nil
×
940
}
941

942
// executePgQuery executes a SQL statement in a postgres db.
943
func executePgQuery(query string) error {
×
944
        pool, err := pgxpool.Connect(
×
945
                context.Background(),
×
946
                postgresDatabaseDsn("postgres"),
×
947
        )
×
948
        if err != nil {
×
949
                return fmt.Errorf("unable to connect to database: %w", err)
×
950
        }
×
951
        defer pool.Close()
×
952

×
953
        _, err = pool.Exec(context.Background(), query)
×
954
        return err
×
955
}
956

957
// renameFile is a helper to rename (log) files created during integration
958
// tests.
959
func renameFile(fromFileName, toFileName string) {
×
960
        err := os.Rename(fromFileName, toFileName)
×
961
        if err != nil {
×
962
                fmt.Printf("could not rename %s to %s: %v\n", // nolint:forbidigo
×
963
                        fromFileName, toFileName, err)
×
964
        }
×
965
}
966

967
// getFinalizedLogFilePrefix returns the finalize log filename.
968
func getFinalizedLogFilePrefix(hn *HarnessNode) string {
×
969
        pubKeyHex := hex.EncodeToString(
×
970
                hn.PubKey[:logPubKeyBytes],
×
971
        )
×
972

×
973
        return fmt.Sprintf("%s/%d-%s-%s-%s", GetLogDir(), hn.Cfg.NodeID,
×
974
                hn.Cfg.LogFilenamePrefix, hn.Cfg.Name, pubKeyHex)
×
975
}
×
976

977
// finalizeLogfile makes sure the log file cleanup function is initialized,
978
// even if no log file is created.
NEW
979
func finalizeLogfile(hn *HarnessNode) string {
×
980
        // Exit early if there's no log file.
×
981
        if hn.logFile == nil {
×
NEW
982
                return ""
×
983
        }
×
984

985
        hn.logFile.Close()
×
986

×
987
        // If logoutput flag is not set, return early.
×
988
        if !*logOutput {
×
NEW
989
                return ""
×
990
        }
×
991

NEW
992
        newFileName := fmt.Sprintf("%v.log", getFinalizedLogFilePrefix(hn))
×
993
        renameFile(hn.filename, newFileName)
×
994

×
NEW
995
        return newFileName
×
996
}
997

998
// assertNodeShutdown asserts that the node has shut down properly by checking
999
// the last lines of the log file for the shutdown message "Shutdown complete".
1000
func assertNodeShutdown(filename string) error {
×
1001
        file, err := os.Open(filename)
×
1002
        if err != nil {
×
1003
                return err
×
1004
        }
×
1005
        defer file.Close()
×
1006

×
1007
        // Read more than one line to make sure we get the last line.
×
1008
        // const linesSize = 200
×
1009
        //
×
1010
        // NOTE: Reading 200 bytes of lines should be more than enough to find
×
1011
        // the `Shutdown complete` message. However, this is only true if the
×
1012
        // message is printed the last, which means `lnd` will properly wait
×
1013
        // for all its subsystems to shut down before exiting. Unfortunately
×
1014
        // there is at least one bug in the shutdown process where we don't
×
1015
        // wait for the chain backend to fully quit first, which can be easily
×
1016
        // reproduced by turning on `RPCC=trace` and use a linesSize of 200.
×
1017
        //
×
1018
        // TODO(yy): fix the shutdown process and remove this workaround by
×
1019
        // refactoring the lnd to use only one rpcclient, which requires quite
×
1020
        // some work on the btcwallet front.
×
1021
        const linesSize = 1000
×
1022

×
1023
        buf := make([]byte, linesSize)
×
1024
        stat, statErr := file.Stat()
×
1025
        if statErr != nil {
×
1026
                return err
×
1027
        }
×
1028

1029
        start := stat.Size() - linesSize
×
1030
        _, err = file.ReadAt(buf, start)
×
1031
        if err != nil {
×
1032
                return err
×
1033
        }
×
1034

1035
        // Exit early if the shutdown line is found.
1036
        if bytes.Contains(buf, []byte("Shutdown complete")) {
×
1037
                return nil
×
1038
        }
×
1039

1040
        // For etcd tests, we need to check for the line where the node is
1041
        // blocked at wallet unlock since we are testing how such a behavior is
1042
        // handled by etcd.
1043
        if bytes.Contains(buf, []byte("wallet and unlock")) {
×
1044
                return nil
×
1045
        }
×
1046

1047
        return fmt.Errorf("node did not shut down properly: found log "+
×
1048
                "lines: %s", buf)
×
1049
}
1050

1051
// finalizeEtcdLog saves the etcd log files when test ends.
1052
func finalizeEtcdLog(hn *HarnessNode) {
×
1053
        // Exit early if this is not etcd backend.
×
1054
        if hn.Cfg.DBBackend != BackendEtcd {
×
1055
                return
×
1056
        }
×
1057

1058
        etcdLogFileName := fmt.Sprintf("%s/etcd.log", hn.Cfg.LogDir)
×
1059
        newEtcdLogFileName := fmt.Sprintf("%v-etcd.log",
×
1060
                getFinalizedLogFilePrefix(hn),
×
1061
        )
×
1062

×
1063
        renameFile(etcdLogFileName, newEtcdLogFileName)
×
1064
}
1065

1066
// addLogFile creates log files used by this node.
1067
func addLogFile(hn *HarnessNode) error {
×
1068
        var fileName string
×
1069

×
1070
        dir := GetLogDir()
×
1071
        fileName = fmt.Sprintf("%s/%d-%s-%s-%s.log", dir, hn.Cfg.NodeID,
×
1072
                hn.Cfg.LogFilenamePrefix, hn.Cfg.Name,
×
1073
                hex.EncodeToString(hn.PubKey[:logPubKeyBytes]))
×
1074

×
1075
        // If the node's PubKey is not yet initialized, create a temporary file
×
1076
        // name. Later, after the PubKey has been initialized, the file can be
×
1077
        // moved to its final name with the PubKey included.
×
1078
        if bytes.Equal(hn.PubKey[:4], []byte{0, 0, 0, 0}) {
×
1079
                fileName = fmt.Sprintf("%s/%d-%s-%s-tmp__.log", dir,
×
1080
                        hn.Cfg.NodeID, hn.Cfg.LogFilenamePrefix,
×
1081
                        hn.Cfg.Name)
×
1082
        }
×
1083

1084
        // Create file if not exists, otherwise append.
1085
        file, err := os.OpenFile(fileName,
×
1086
                os.O_WRONLY|os.O_APPEND|os.O_CREATE, 0666)
×
1087
        if err != nil {
×
1088
                return err
×
1089
        }
×
1090

1091
        // Pass node's stderr to both errb and the file.
1092
        w := io.MultiWriter(hn.cmd.Stderr, file)
×
1093
        hn.cmd.Stderr = w
×
1094

×
1095
        // Pass the node's stdout only to the file.
×
1096
        hn.cmd.Stdout = file
×
1097

×
1098
        // Let the node keep a reference to this file, such that we can add to
×
1099
        // it if necessary.
×
1100
        hn.logFile = file
×
1101

×
1102
        hn.filename = fileName
×
1103

×
1104
        return nil
×
1105
}
1106

1107
// copyAll copies all files and directories from srcDir to dstDir recursively.
1108
// Note that this function does not support links.
1109
func copyAll(dstDir, srcDir string) error {
×
1110
        entries, err := os.ReadDir(srcDir)
×
1111
        if err != nil {
×
1112
                return err
×
1113
        }
×
1114

1115
        for _, entry := range entries {
×
1116
                srcPath := filepath.Join(srcDir, entry.Name())
×
1117
                dstPath := filepath.Join(dstDir, entry.Name())
×
1118

×
1119
                info, err := os.Stat(srcPath)
×
1120
                if err != nil {
×
1121
                        return err
×
1122
                }
×
1123

1124
                if info.IsDir() {
×
1125
                        err := os.Mkdir(dstPath, info.Mode())
×
1126
                        if err != nil && !os.IsExist(err) {
×
1127
                                return err
×
1128
                        }
×
1129

1130
                        err = copyAll(dstPath, srcPath)
×
1131
                        if err != nil {
×
1132
                                return err
×
1133
                        }
×
1134
                } else if err := CopyFile(dstPath, srcPath); err != nil {
×
1135
                        return err
×
1136
                }
×
1137
        }
1138

1139
        return nil
×
1140
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc