问题

cdk 跨链桥启动后,发起一笔跨链复现networkID数据错乱,重启后报错

2024-11-16T14:36:29.512Z        INFO    cmd/run.go:59   main network id: 0      {"pid": 1, "version": "v0.6.0-RC1"}
2024-11-16T14:36:29.512Z        INFO    cmd/run.go:64   l2 network id: 0        {"pid": 1, "version": "v0.6.0-RC1"}
2024-11-16T14:36:29.542Z        DEBUG   cmd/run.go:102  trusted sequencer URL http://172.18.39.103:8123 {"pid": 1, "version": "v0.6.0-RC1"}
2024-11-16T14:36:29.542Z        INFO    server/server.go:93     gRPC Server is serving at 9090  {"pid": 1, "version": "v0.6.0-RC1"}
2024-11-16T14:36:29.542Z        INFO    server/server.go:166    Restful Server is serving at 8080       {"pid": 1, "version": "v0.6.0-RC1"}
2024-11-16T14:36:29.548Z        INFO    synchronizer/synchronizer.go:104        NetworkID: 0, Synchronization started   {"pid": 1, "version": "v0.6.0-RC1"}
2024-11-16T14:36:29.549Z        DEBUG   synchronizer/synchronizer.go:119        NetworkID: 0, initial lastBlockSynced: &{ID:3 BlockNumber:7089137 BlockHash:0xbcb0638af2a8014b561ee72c2f568bda819039ca93f16c7de22896fc1037c1a3 ParentHash:0x919c0185dfa6dbc92fb9e13f535c09130d04d1f980749b952b057ffed68f77b6 NetworkID:0 GlobalExitRoots:[] Deposits:[] Claims:[] Tokens:[] VerifiedBatches:[] ActivateEtrog:[] ReceivedAt:2024-11-16 13:48:00 +0000 UTC}     {"pid": 1, "version": "v0.6.0-RC1"}
2024-11-16T14:36:29.549Z        DEBUG   synchronizer/synchronizer.go:126        NetworkID: 0, syncing...        {"pid": 1, "version": "v0.6.0-RC1"}
2024-11-16T14:36:29.550Z        INFO    synchronizer/synchronizer.go:543        NetworkID: 0, [checkReorg function] Checking Block 7089137 in L1        {"pid": 1, "version": "v0.6.0-RC1"}
2024-11-16T14:36:29.553Z        ERROR   synchronizer/synchronizer.go:546        networkID: 0, error getting latest block synced from blockchain. Block: 7089137, error: Not found%!(EXTRA string=
/src/log/log.go:142 github.com/0xPolygonHermez/zkevm-bridge-service/log.appendStackTraceMaybeArgs()
/src/log/log.go:251 github.com/0xPolygonHermez/zkevm-bridge-service/log.Errorf()
/src/synchronizer/synchronizer.go:546 github.com/0xPolygonHermez/zkevm-bridge-service/synchronizer.(*ClientSynchronizer).checkReorg()
/src/synchronizer/synchronizer.go:239 github.com/0xPolygonHermez/zkevm-bridge-service/synchronizer.(*ClientSynchronizer).syncBlocks()
/src/synchronizer/synchronizer.go:128 github.com/0xPolygonHermez/zkevm-bridge-service/synchronizer.(*ClientSynchronizer).Sync()
/src/cmd/run.go:245 main.runSynchronizer()
)       {"pid": 1, "version": "v0.6.0-RC1"}
github.com/0xPolygonHermez/zkevm-bridge-service/synchronizer.(*ClientSynchronizer).checkReorg
        /src/synchronizer/synchronizer.go:546
github.com/0xPolygonHermez/zkevm-bridge-service/synchronizer.(*ClientSynchronizer).syncBlocks
        /src/synchronizer/synchronizer.go:239
github.com/0xPolygonHermez/zkevm-bridge-service/synchronizer.(*ClientSynchronizer).Sync
        /src/synchronizer/synchronizer.go:128
main.runSynchronizer
        /src/cmd/run.go:245
2024-11-16T14:36:29.553Z        ERROR   synchronizer/synchronizer.go:241        networkID: 0, error checking reorgs. Retrying... Err: Not found {"pid": 1, "version": "v0.6.0-RC1"}
github.com/0xPolygonHermez/zkevm-bridge-service/synchronizer.(*ClientSynchronizer).syncBlocks
        /src/synchronizer/synchronizer.go:241
github.com/0xPolygonHermez/zkevm-bridge-service/synchronizer.(*ClientSynchronizer).Sync
        /src/synchronizer/synchronizer.go:128
main.runSynchronizer
        /src/cmd/run.go:245
2024-11-16T14:36:29.553Z        WARN    synchronizer/synchronizer.go:129        networkID: 0, error syncing blocks: networkID: 0, error checking reorgs {"pid": 1, "version": "v0.6.0-RC1"}
github.com/0xPolygonHermez/zkevm-bridge-service/synchronizer.(*ClientSynchronizer).Sync
        /src/synchronizer/synchronizer.go:129
main.runSynchronizer
        /src/cmd/run.go:245
2024-11-16T14:36:29.554Z        FATAL   synchronizer/synchronizer.go:161        networkID: 0, error: latest Synced BlockNumber (7089137) is higher than the latest Proposed block (1008) in the network {"pid": 1, "version": "v0.6.0-RC1"}
github.com/0xPolygonHermez/zkevm-bridge-service/synchronizer.(*ClientSynchronizer).Sync
        /src/synchronizer/synchronizer.go:161
main.runSynchronizer
        /src/cmd/run.go:245
2024-11-16T14:37:29.896Z        INFO    cmd/run.go:26   Starting application    {"pid": 1, "version": "v0.6.0-RC1", "gitRevision": "002f436", "gitBranch": "HEAD", "goVersion": "go1.21.13", "built": "Thu, 26 Sep 2024 10:05:53 +0000", "os/arch": "linux/amd64"}
2024-11-16T14:37:29.915Z        INFO    pgstorage/utils.go:63   successfully ran 0 migrations   {"pid": 1, "version": "v0.6.0-RC1"}
2024-11-16T14:37:29.923Z        WARN    etherman/etherman.go:187        Claim compressor Address not configured {"pid": 1, "version": "v0.6.0-RC1"}
github.com/0xPolygonHermez/zkevm-bridge-service/etherman.NewL2Client
        /src/etherman/etherman.go:187
main.newEthermans
        /src/cmd/run.go:230
main.start
        /src/cmd/run.go:52
github.com/urfave/cli/v2.(*Command).Run
        /go/pkg/mod/github.com/urfave/cli/v2@v2.27.4/command.go:276
github.com/urfave/cli/v2.(*Command).Run
        /go/pkg/mod/github.com/urfave/cli/v2@v2.27.4/command.go:269
github.com/urfave/cli/v2.(*App).RunContext
        /go/pkg/mod/github.com/urfave/cli/v2@v2.27.4/app.go:333
github.com/urfave/cli/v2.(*App).Run
        /go/pkg/mod/github.com/urfave/cli/v2@v2.27.4/app.go:307
main.main
        /src/cmd/main.go:56
runtime.main
        /usr/local/go/src/runtime/proc.go:267
func (s *ClientSynchronizer) Sync() error {
    lastBlockSynced, err := s.storage.GetLastBlock(s.ctx, s.networkID, nil)
    ....
    if !s.synced {
                // Check latest Block
                header, err := s.etherMan.HeaderByNumber(s.ctx, nil)
                if err != nil {
                    log.Warnf("networkID: %d, error getting latest block from. Error: %s", s.networkID, err.Error())
                    continue
                }
                lastKnownBlock := header.Number.Uint64()
                if lastBlockSynced.BlockNumber == lastKnownBlock && !s.synced {
                    log.Infof("NetworkID %d Synced!", s.networkID)
                    waitDuration = s.cfg.SyncInterval.Duration
                    s.synced = true
                    s.chSynced <- s.networkID
                }
                if lastBlockSynced.BlockNumber > lastKnownBlock {
                    if s.networkID == 0 {
                        log.Fatalf("networkID: %d, error: latest Synced BlockNumber (%d) is higher than the latest Proposed block (%d) in the network", s.networkID, lastBlockSynced.BlockNumber, lastKnownBlock)
                    } 

lastBlockSynced = 7089137
lastKnownBlock = 1008

lastBlockSynced为L1数据,lastKnownBlock为L2数据,说明s.etherMan应使用L1 RPC,目前使用了L2RPC导致出错
s.storage.GetLastBlock查询时有networkID区分,所以被对比数据是正确的
s.networkID等于0, s.etherMan使用的L2 RPC, 也就是networkID与etherMan对应关系乱了

func runSynchronizer(ctx context.Context, genBlockNumber uint64, brdigeCtrl *bridgectrl.BridgeController, etherman *etherman.Client, cfg synchronizer.Config, storage db.Storage, zkEVMClient *client.Client, chExitRootEventL2 chan *etherman.GlobalExitRoot, chsExitRootEvent []chan *etherman.GlobalExitRoot, chSynced chan uint, allNetworkIDs []uint) {
    sy, err := synchronizer.NewSynchronizer(ctx, storage, brdigeCtrl, etherman, zkEVMClient, genBlockNumber, chExitRootEventL2, chsExitRootEvent, chSynced, cfg, allNetworkIDs)
    if err != nil {
        log.Fatal(err)
    }
    if err := sy.Sync(); err != nil {
        log.Fatal(err)
    }
}
for i, l2EthermanClient := range l2Ethermans {
        log.Debug("trusted sequencer URL ", c.Etherman.L2URLs[i])
        zkEVMClient := client.NewClient(c.Etherman.L2URLs[i])
        chExitRootEventL2 := make(chan *etherman.GlobalExitRoot)
        chSyncedL2 := make(chan uint)
        chsExitRootEvent = append(chsExitRootEvent, chExitRootEventL2)
        chsSyncedL2 = append(chsSyncedL2, chSyncedL2)
        go runSynchronizer(ctx.Context, 0, bridgeController, l2EthermanClient, c.Synchronizer, storage, zkEVMClient, chExitRootEventL2, nil, chSyncedL2, []uint{})
    }
    chSynced := make(chan uint)
    go runSynchronizer(ctx.Context, c.NetworkConfig.GenBlockNumber, bridgeController, l1Etherman, c.Synchronizer, storage, nil, nil, chsExitRootEvent, chSynced, networkIDs)
func newEthermans(c *config.Config) (*etherman.Client, []*etherman.Client, error) {
    l1Etherman, err := etherman.NewClient(c.Etherman,
        c.NetworkConfig.PolygonBridgeAddress,
        c.NetworkConfig.PolygonZkEVMGlobalExitRootAddress,
        c.NetworkConfig.PolygonRollupManagerAddress)
    if err != nil {
        log.Error("L1 etherman error: ", err)
        return nil, nil, err
    }
    if len(c.L2PolygonBridgeAddresses) != len(c.Etherman.L2URLs) {
        log.Fatal("environment configuration error. zkevm bridge addresses and zkevm node urls mismatch")
    }
    var l2Ethermans []*etherman.Client
    for i, addr := range c.L2PolygonBridgeAddresses {
        l2Etherman, err := etherman.NewL2Client(c.Etherman.L2URLs[i], addr, c.NetworkConfig.L2ClaimCompressorAddress)
        if err != nil {
            log.Error("L2 etherman ", i, c.Etherman.L2URLs[i], ", error: ", err)
            return l1Etherman, nil, err
        }
        l2Ethermans = append(l2Ethermans, l2Etherman)
    }
    return l1Etherman, l2Ethermans, nil
}

对于l1Etherman networkID直接没有赋值,所以等于0

func NewClient(cfg Config, polygonBridgeAddr, polygonZkEVMGlobalExitRootAddress, polygonRollupManagerAddress common.Address) (*Client, error) {
    logger := log.WithFields("networkID", 0)
    // Connect to ethereum node
    ethClient, err := ethclient.Dial(cfg.L1URL)

对于l2Etherman,经过NewL2Client

func NewL2Client(url string, polygonBridgeAddr, claimCompressorAddress common.Address) (*Client, error) {
    // Connect to ethereum node
    ethClient, err := ethclient.Dial(url)
    if err != nil {
        log.Errorf("error connecting to %s: %+v", url, err)
        return nil, err
    }
    // Create smc clients
    bridge, err := polygonzkevmbridge.NewPolygonzkevmbridge(polygonBridgeAddr, ethClient)
    if err != nil {
        return nil, err
    }
    oldpolygonBridge, err := oldpolygonzkevmbridge.NewOldpolygonzkevmbridge(polygonBridgeAddr, ethClient)
    if err != nil {
        return nil, err
    }
    var claimCompressor *claimcompressor.Claimcompressor
    if claimCompressorAddress == (common.Address{}) {
        log.Warn("Claim compressor Address not configured")
    } else {
        log.Infof("Grouping claims allowed, claimCompressor=%s", claimCompressorAddress.String())
        claimCompressor, err = claimcompressor.NewClaimcompressor(claimCompressorAddress, ethClient)
        if err != nil {
            log.Errorf("error creating claimCompressor: %+v", err)
            return nil, err
        }
    }
    networkID, err := bridge.NetworkID(&bind.CallOpts{Pending: false}) // 从合约读取
    if err != nil {
        return nil, err
    }
    scAddresses := []common.Address{polygonBridgeAddr}
    logger := log.WithFields("networkID", networkID)

    return &Client{
        logger:           logger,
        EtherClient:      ethClient,
        PolygonBridge:    bridge,
        OldPolygonBridge: oldpolygonBridge,
        SCAddresses:      scAddresses,
        ClaimCompressor:  claimCompressor,
        NetworkID:        networkID,
    }, nil
}

对于L2 会使用对应的L2 RPC从二层桥合约读取对应的NetworkID

// Solidity: function networkID() view returns(uint32)
func (_Polygonzkevmbridge *PolygonzkevmbridgeCaller) NetworkID(opts *bind.CallOpts) (uint32, error) {
    var out []interface{}
    err := _Polygonzkevmbridge.contract.Call(opts, &out, "networkID")

    if err != nil {
        return *new(uint32), err
    }

    out0 := *abi.ConvertType(out[0], new(uint32)).(*uint32)

    return out0, err

}

查询二层networkID,确实是0
目前的问题就是因为二层桥合约错误的设置了networkID,导致逻辑错乱,进一步分析下,二层networkID什么时候设置的,如何导致的错误
经过确认,PolygonZkEVMBridgeV2合约没有初始化

结论

经过确认和重置对比测试,该错误是因为当时部署时RPC不稳定造成的交易丢失,所以部署时尽量选择稳定的RPC以及对应链相对空闲时段部署,并且部署完成后检查一下L2的networkID是否为1