From 7dbe7c6a22110e184d27577989b941715b6f9d94 Mon Sep 17 00:00:00 2001 From: jiangyuanshu <317787106@qq.com> Date: Wed, 26 Jun 2024 17:32:42 +0800 Subject: [PATCH 01/29] initial submit of disconnect zombie node --- .../java/org/tron/core/ChainBaseManager.java | 4 + common/build.gradle | 2 +- .../common/parameter/CommonParameter.java | 6 + .../common/parameter/ResilienceConfig.java | 29 ++++ .../src/main/java/org/tron/core/Constant.java | 7 + .../java/org/tron/core/config/args/Args.java | 32 +++++ .../main/java/org/tron/core/db/Manager.java | 1 + .../org/tron/core/net/TronNetService.java | 6 + .../net/messagehandler/BlockMsgHandler.java | 1 + .../ChainInventoryMsgHandler.java | 4 + .../SyncBlockChainMsgHandler.java | 6 + .../tron/core/net/peer/PeerConnection.java | 84 +++++++++++ .../service/effective/ResilienceService.java | 133 ++++++++++++++++++ protocol/src/main/protos/core/Tron.proto | 1 + 14 files changed, 315 insertions(+), 1 deletion(-) create mode 100644 common/src/main/java/org/tron/common/parameter/ResilienceConfig.java create mode 100644 framework/src/main/java/org/tron/core/net/service/effective/ResilienceService.java diff --git a/chainbase/src/main/java/org/tron/core/ChainBaseManager.java b/chainbase/src/main/java/org/tron/core/ChainBaseManager.java index e43d442534a..859c425e51e 100644 --- a/chainbase/src/main/java/org/tron/core/ChainBaseManager.java +++ b/chainbase/src/main/java/org/tron/core/ChainBaseManager.java @@ -244,6 +244,10 @@ public class ChainBaseManager { @Setter private long lowestBlockNum = -1; // except num = 0. + @Getter + @Setter + private long latestSaveBlockTime = System.currentTimeMillis(); + // for test only public List getWitnesses() { return witnessScheduleStore.getActiveWitnesses(); diff --git a/common/build.gradle b/common/build.gradle index 6c1545e5d13..9f491b533bd 100644 --- a/common/build.gradle +++ b/common/build.gradle @@ -46,7 +46,7 @@ dependencies { compile 'org.aspectj:aspectjrt:1.8.13' compile 'org.aspectj:aspectjweaver:1.8.13' compile 'org.aspectj:aspectjtools:1.8.13' - compile group: 'io.github.tronprotocol', name: 'libp2p', version: '2.2.1',{ + compile group: 'com.github.317787106', name: 'libp2p', version: 'v0.0.4',{ exclude group: 'io.grpc', module: 'grpc-context' exclude group: 'io.grpc', module: 'grpc-core' exclude group: 'io.grpc', module: 'grpc-netty' diff --git a/common/src/main/java/org/tron/common/parameter/CommonParameter.java b/common/src/main/java/org/tron/common/parameter/CommonParameter.java index 22159063333..bd2baab923d 100644 --- a/common/src/main/java/org/tron/common/parameter/CommonParameter.java +++ b/common/src/main/java/org/tron/common/parameter/CommonParameter.java @@ -144,6 +144,9 @@ public class CommonParameter { public boolean nodeEffectiveCheckEnable; @Getter @Setter + public ResilienceConfig resilienceConfig; + @Getter + @Setter public int nodeConnectionTimeout; @Getter @Setter @@ -333,6 +336,9 @@ public class CommonParameter { public boolean isOpenFullTcpDisconnect; @Getter @Setter + public int peerNoBlockTime; + @Getter + @Setter public boolean nodeDetectEnable; @Getter @Setter diff --git a/common/src/main/java/org/tron/common/parameter/ResilienceConfig.java b/common/src/main/java/org/tron/common/parameter/ResilienceConfig.java new file mode 100644 index 00000000000..a756d077a3a --- /dev/null +++ b/common/src/main/java/org/tron/common/parameter/ResilienceConfig.java @@ -0,0 +1,29 @@ +package org.tron.common.parameter; + +import lombok.Getter; +import lombok.Setter; +import lombok.extern.slf4j.Slf4j; + +@Slf4j(topic = "net") +public class ResilienceConfig { + @Getter + @Setter + private boolean enabled = false; + + @Getter + @Setter + private int checkInterval = 60; + + @Getter + @Setter + private int zombieThreshold = 300; + + @Getter + @Setter + private int blockNotChangeTime = 300; + + @Getter + @Setter + private int disconnectNumber = 1; + +} diff --git a/common/src/main/java/org/tron/core/Constant.java b/common/src/main/java/org/tron/core/Constant.java index 0e634d3ef7d..3b8fa36b865 100644 --- a/common/src/main/java/org/tron/core/Constant.java +++ b/common/src/main/java/org/tron/core/Constant.java @@ -124,6 +124,12 @@ public class Constant { public static final String NODE_DNS_AWS_REGION = "node.dns.awsRegion"; public static final String NODE_DNS_AWS_HOST_ZONE_ID = "node.dns.awsHostZoneId"; + public static final String NODE_RESILIENCE_ENABLE = "node.resilience.enable"; + public static final String NODE_RESILIENCE_CHECK_INTERVAL = "node.resilience.checkInterval"; + public static final String NODE_RESILIENCE_ZOMBIE_THRESHOLD = "node.resilience.zombieThreshold"; + public static final String NODE_RESILIENCE_BLOCK_NOT_CHANGE_TIME = "node.resilience.blockNotChangeTime"; + public static final String NODE_RESILIENCE_DISCONNECT_NUMBER = "node.resilience.disconnectNumber"; + public static final String NODE_RPC_PORT = "node.rpc.port"; public static final String NODE_RPC_SOLIDITY_PORT = "node.rpc.solidityPort"; public static final String NODE_RPC_PBFT_PORT = "node.rpc.PBFTPort"; @@ -197,6 +203,7 @@ public class Constant { public static final String NODE_RECEIVE_TCP_MIN_DATA_LENGTH = "node.receiveTcpMinDataLength"; public static final String NODE_IS_OPEN_FULL_TCP_DISCONNECT = "node.isOpenFullTcpDisconnect"; + public static final String NODE_PEER_NO_BLOCK_TIME = "node.peerNoBlockTime"; public static final String NODE_DETECT_ENABLE = "node.nodeDetectEnable"; diff --git a/framework/src/main/java/org/tron/core/config/args/Args.java b/framework/src/main/java/org/tron/core/config/args/Args.java index 422efefaed8..d2afc85dc7b 100644 --- a/framework/src/main/java/org/tron/core/config/args/Args.java +++ b/framework/src/main/java/org/tron/core/config/args/Args.java @@ -56,6 +56,7 @@ import org.tron.common.logsfilter.trigger.ContractLogTrigger; import org.tron.common.parameter.CommonParameter; import org.tron.common.parameter.RateLimiterInitialization; +import org.tron.common.parameter.ResilienceConfig; import org.tron.common.setting.RocksDbSettings; import org.tron.common.utils.ByteArray; import org.tron.common.utils.Commons; @@ -172,6 +173,7 @@ public static void clearParam() { PARAMETER.estimateEnergyMaxRetry = 3; PARAMETER.receiveTcpMinDataLength = 2048; PARAMETER.isOpenFullTcpDisconnect = false; + PARAMETER.peerNoBlockTime = 300_000; PARAMETER.nodeDetectEnable = false; PARAMETER.supportConstant = false; PARAMETER.debug = false; @@ -569,6 +571,8 @@ public static void setParam(final String[] args, final String confFileName) { config.hasPath(Constant.NODE_EFFECTIVE_CHECK_ENABLE) && config.getBoolean(Constant.NODE_EFFECTIVE_CHECK_ENABLE); + PARAMETER.resilienceConfig = loadResilienceConfig(config); + PARAMETER.nodeConnectionTimeout = config.hasPath(Constant.NODE_CONNECTION_TIMEOUT) ? config.getInt(Constant.NODE_CONNECTION_TIMEOUT) * 1000 @@ -841,6 +845,8 @@ public static void setParam(final String[] args, final String confFileName) { PARAMETER.isOpenFullTcpDisconnect = config.hasPath(Constant.NODE_IS_OPEN_FULL_TCP_DISCONNECT) && config.getBoolean(Constant.NODE_IS_OPEN_FULL_TCP_DISCONNECT); + PARAMETER.peerNoBlockTime = config.hasPath(Constant.NODE_PEER_NO_BLOCK_TIME) + ? config.getInt(Constant.NODE_PEER_NO_BLOCK_TIME) : 300_000; PARAMETER.nodeDetectEnable = config.hasPath(Constant.NODE_DETECT_ENABLE) && config.getBoolean(Constant.NODE_DETECT_ENABLE); @@ -1483,6 +1489,32 @@ private static void logEmptyError(String arg) { throw new IllegalArgumentException(String.format("Check %s, must not be null or empty", arg)); } + private static ResilienceConfig loadResilienceConfig(final com.typesafe.config.Config config) { + ResilienceConfig resilienceConfig = new ResilienceConfig(); + if (config.hasPath(Constant.NODE_RESILIENCE_ENABLE)) { + resilienceConfig.setEnabled(config.getBoolean(Constant.NODE_RESILIENCE_ENABLE)); + } + if (resilienceConfig.isEnabled()) { + if (config.hasPath(Constant.NODE_RESILIENCE_CHECK_INTERVAL)) { + resilienceConfig.setCheckInterval(config.getInt(Constant.NODE_RESILIENCE_CHECK_INTERVAL)); + } + if (config.hasPath(Constant.NODE_RESILIENCE_ZOMBIE_THRESHOLD)) { + resilienceConfig.setZombieThreshold( + config.getInt(Constant.NODE_RESILIENCE_ZOMBIE_THRESHOLD)); + } + if (config.hasPath(Constant.NODE_RESILIENCE_BLOCK_NOT_CHANGE_TIME)) { + resilienceConfig.setBlockNotChangeTime( + config.getInt(Constant.NODE_RESILIENCE_BLOCK_NOT_CHANGE_TIME)); + } + if (config.hasPath(Constant.NODE_RESILIENCE_DISCONNECT_NUMBER)) { + resilienceConfig.setDisconnectNumber( + config.getInt(Constant.NODE_RESILIENCE_DISCONNECT_NUMBER)); + } + } + + return resilienceConfig; + } + private static TriggerConfig createTriggerConfig(ConfigObject triggerObject) { if (Objects.isNull(triggerObject)) { return null; diff --git a/framework/src/main/java/org/tron/core/db/Manager.java b/framework/src/main/java/org/tron/core/db/Manager.java index ef2f5c81124..66aeccdda39 100644 --- a/framework/src/main/java/org/tron/core/db/Manager.java +++ b/framework/src/main/java/org/tron/core/db/Manager.java @@ -1384,6 +1384,7 @@ public void updateDynamicProperties(BlockCapsule block) { (chainBaseManager.getDynamicPropertiesStore().getLatestBlockHeaderNumber() - chainBaseManager.getDynamicPropertiesStore().getLatestSolidifiedBlockNum() + 1)); + chainBaseManager.setLatestSaveBlockTime(System.currentTimeMillis()); Metrics.gaugeSet(MetricKeys.Gauge.HEADER_HEIGHT, block.getNum()); Metrics.gaugeSet(MetricKeys.Gauge.HEADER_TIME, block.getTimeStamp()); } diff --git a/framework/src/main/java/org/tron/core/net/TronNetService.java b/framework/src/main/java/org/tron/core/net/TronNetService.java index 03becf5d4e9..bda2d226365 100644 --- a/framework/src/main/java/org/tron/core/net/TronNetService.java +++ b/framework/src/main/java/org/tron/core/net/TronNetService.java @@ -22,6 +22,7 @@ import org.tron.core.net.peer.PeerStatusCheck; import org.tron.core.net.service.adv.AdvService; import org.tron.core.net.service.effective.EffectiveCheckService; +import org.tron.core.net.service.effective.ResilienceService; import org.tron.core.net.service.fetchblock.FetchBlockService; import org.tron.core.net.service.nodepersist.NodePersistService; import org.tron.core.net.service.relay.RelayService; @@ -73,6 +74,9 @@ public class TronNetService { @Autowired private EffectiveCheckService effectiveCheckService; + @Autowired + private ResilienceService resilienceService; + private volatile boolean init; private static void setP2pConfig(P2pConfig config) { @@ -95,6 +99,7 @@ public void start() { PeerManager.init(); relayService.init(); effectiveCheckService.init(); + resilienceService.init(); logger.info("Net service start successfully"); } catch (Exception e) { logger.error("Net service start failed", e); @@ -178,6 +183,7 @@ private P2pConfig updateConfig(P2pConfig config) { config.setPort(parameter.getNodeListenPort()); config.setNetworkId(parameter.getNodeP2pVersion()); config.setDisconnectionPolicyEnable(parameter.isOpenFullTcpDisconnect()); + config.setNotActiveInterval(parameter.peerNoBlockTime); config.setNodeDetectEnable(parameter.isNodeDetectEnable()); config.setDiscoverEnable(parameter.isNodeDiscoveryEnable()); if (StringUtils.isEmpty(config.getIp()) && hasIpv4Stack(NetUtil.getAllLocalAddress())) { diff --git a/framework/src/main/java/org/tron/core/net/messagehandler/BlockMsgHandler.java b/framework/src/main/java/org/tron/core/net/messagehandler/BlockMsgHandler.java index 926ed1a01ca..fec6a8b017a 100644 --- a/framework/src/main/java/org/tron/core/net/messagehandler/BlockMsgHandler.java +++ b/framework/src/main/java/org/tron/core/net/messagehandler/BlockMsgHandler.java @@ -77,6 +77,7 @@ public void processMessage(PeerConnection peer, TronMessage msg) throws P2pExcep check(peer, blockMessage); } + peer.getChannel().setLastActiveTime(System.currentTimeMillis()); if (peer.getSyncBlockRequested().containsKey(blockId)) { peer.getSyncBlockRequested().remove(blockId); peer.getSyncBlockInProcess().add(blockId); diff --git a/framework/src/main/java/org/tron/core/net/messagehandler/ChainInventoryMsgHandler.java b/framework/src/main/java/org/tron/core/net/messagehandler/ChainInventoryMsgHandler.java index bd2e428418c..96688366820 100644 --- a/framework/src/main/java/org/tron/core/net/messagehandler/ChainInventoryMsgHandler.java +++ b/framework/src/main/java/org/tron/core/net/messagehandler/ChainInventoryMsgHandler.java @@ -51,8 +51,12 @@ public void processMessage(PeerConnection peer, TronMessage msg) throws P2pExcep Deque blockIdWeGet = new LinkedList<>(chainInventoryMessage.getBlockIds()); if (blockIdWeGet.size() == 1 && tronNetDelegate.containBlock(blockIdWeGet.peek())) { + if (blockIdWeGet.peek().getNum() < peer.getHelloMessageReceive().getSolidBlockId().getNum()) { + peer.getMaliciousFeature().updateBadFeature1(); + } peer.setTronState(TronState.SYNC_COMPLETED); peer.setNeedSyncFromPeer(false); + peer.updateAdvStartTime(); return; } diff --git a/framework/src/main/java/org/tron/core/net/messagehandler/SyncBlockChainMsgHandler.java b/framework/src/main/java/org/tron/core/net/messagehandler/SyncBlockChainMsgHandler.java index 958ebfe5561..f60421e6016 100644 --- a/framework/src/main/java/org/tron/core/net/messagehandler/SyncBlockChainMsgHandler.java +++ b/framework/src/main/java/org/tron/core/net/messagehandler/SyncBlockChainMsgHandler.java @@ -37,6 +37,11 @@ public void processMessage(PeerConnection peer, TronMessage msg) throws P2pExcep long remainNum = 0; List summaryChainIds = syncBlockChainMessage.getBlockIds(); + if (peer.isNeedSyncFromUs() && summaryChainIds.size() == 1 && summaryChainIds.get(0).getNum() + == peer.getHelloMessageReceive().getHeadBlockId().getNum()) { + peer.getMaliciousFeature().updateBadFeature2(); + } + BlockId headID = tronNetDelegate.getHeadBlockId(); LinkedList blockIds = getLostBlockIds(summaryChainIds, headID); @@ -46,6 +51,7 @@ public void processMessage(PeerConnection peer, TronMessage msg) throws P2pExcep return; } else if (blockIds.size() == 1) { peer.setNeedSyncFromUs(false); + peer.updateAdvStartTime(); } else { peer.setNeedSyncFromUs(true); remainNum = headID.getNum() - blockIds.peekLast().getNum(); diff --git a/framework/src/main/java/org/tron/core/net/peer/PeerConnection.java b/framework/src/main/java/org/tron/core/net/peer/PeerConnection.java index 4fac50b82c7..71e124e931d 100644 --- a/framework/src/main/java/org/tron/core/net/peer/PeerConnection.java +++ b/framework/src/main/java/org/tron/core/net/peer/PeerConnection.java @@ -5,6 +5,9 @@ import com.google.protobuf.ByteString; import java.net.InetAddress; import java.net.InetSocketAddress; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; import java.util.Deque; import java.util.HashSet; import java.util.List; @@ -21,6 +24,7 @@ import org.springframework.context.annotation.Scope; import org.springframework.stereotype.Component; import org.tron.common.overlay.message.Message; +import org.tron.common.parameter.CommonParameter; import org.tron.common.prometheus.MetricKeys; import org.tron.common.prometheus.Metrics; import org.tron.common.utils.Pair; @@ -33,6 +37,7 @@ import org.tron.core.metrics.MetricsKey; import org.tron.core.metrics.MetricsUtil; import org.tron.core.net.TronNetDelegate; +import org.tron.core.net.message.adv.BlockMessage; import org.tron.core.net.message.adv.InventoryMessage; import org.tron.core.net.message.adv.TransactionsMessage; import org.tron.core.net.message.base.DisconnectMessage; @@ -75,6 +80,16 @@ public class PeerConnection { @Getter private volatile boolean isBadPeer; + @Getter + private final MaliciousFeature maliciousFeature = new MaliciousFeature(); + + @Getter + private long advStartTime = -1; + + @Getter + private final long zombieThreshold = Args.getInstance().getResilienceConfig() + .getZombieThreshold(); + @Getter @Setter private ByteString address; @@ -166,6 +181,12 @@ public void setBlockBothHave(BlockId blockId) { this.blockBothHaveUpdateTime = System.currentTimeMillis(); } + public void updateAdvStartTime() { + if (!needSyncFromPeer && !needSyncFromUs) { + this.advStartTime = System.currentTimeMillis(); + } + } + public boolean isIdle() { return advInvRequest.isEmpty() && syncBlockRequested.isEmpty() && syncChainRequested == null; } @@ -175,6 +196,9 @@ public void sendMessage(Message message) { logger.info("Send peer {} message {}", channel.getInetSocketAddress(), message); } channel.send(message.getSendBytes()); + if (message instanceof BlockMessage) { + this.channel.setLastActiveTime(System.currentTimeMillis()); + } peerStatistics.messageStatistics.addTcpOutMessage(message); } @@ -192,6 +216,7 @@ public void onConnect() { } setTronState(TronState.SYNC_COMPLETED); } + updateAdvStartTime(); } public void onDisconnect() { @@ -304,6 +329,65 @@ public synchronized boolean checkAndPutAdvInvRequest(Item key, Long value) { return true; } + + @Getter + public class MaliciousFeature { + + private boolean hasBadSyncBlockChain = false; + private long badSyncBlockChainTime; + private boolean hasBadChainInventory = false; + private long badChainInventoryTime; + private boolean isZombie = false; + private long zombieBeginTime; + + //it can only be set from false to true + public void updateBadFeature1() { + if (!hasBadSyncBlockChain) { + hasBadSyncBlockChain = true; + badSyncBlockChainTime = System.currentTimeMillis(); + } + } + + //it can only be set from false to true + public void updateBadFeature2() { + if (!hasBadChainInventory) { + hasBadChainInventory = true; + badChainInventoryTime = System.currentTimeMillis(); + } + } + + // if peer is in adv status and no block received and sent between us for too long, + // it is a zombie + public void updateBadFeature3() { + isZombie = false; + if (!needSyncFromPeer && !needSyncFromUs + && System.currentTimeMillis() - Math.max(channel.getLastActiveTime(), advStartTime) + > zombieThreshold * 1000) { + this.isZombie = true; + this.zombieBeginTime = Math.max(channel.getLastActiveTime(), advStartTime); + } + } + + public long getOldestTime() { + List times = new ArrayList<>(); + if (hasBadSyncBlockChain) { + times.add(badSyncBlockChainTime); + } + if (hasBadChainInventory) { + times.add(badChainInventoryTime); + } + if (isZombie) { + times.add(zombieBeginTime); + } + return Collections.min(times); + } + } + + public boolean isMalicious() { + return maliciousFeature.hasBadSyncBlockChain || maliciousFeature.hasBadChainInventory + || maliciousFeature.isZombie; + } + @Override public boolean equals(Object o) { if (!(o instanceof PeerConnection)) { diff --git a/framework/src/main/java/org/tron/core/net/service/effective/ResilienceService.java b/framework/src/main/java/org/tron/core/net/service/effective/ResilienceService.java new file mode 100644 index 00000000000..e305b3169e4 --- /dev/null +++ b/framework/src/main/java/org/tron/core/net/service/effective/ResilienceService.java @@ -0,0 +1,133 @@ +package org.tron.core.net.service.effective; + +import java.util.Comparator; +import java.util.List; +import java.util.Optional; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.TimeUnit; +import java.util.stream.Collectors; +import lombok.extern.slf4j.Slf4j; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Component; +import org.tron.common.es.ExecutorServiceManager; +import org.tron.common.parameter.CommonParameter; +import org.tron.common.parameter.ResilienceConfig; +import org.tron.core.ChainBaseManager; +import org.tron.core.net.TronNetDelegate; +import org.tron.core.net.peer.PeerConnection; +import org.tron.protos.Protocol.ReasonCode; + +@Slf4j(topic = "net") +@Component +public class ResilienceService { + + private final ResilienceConfig resilienceConfig = CommonParameter.getInstance() + .getResilienceConfig(); + + @Autowired + private TronNetDelegate tronNetDelegate; + + @Autowired + private ChainBaseManager chainBaseManager; + + private final String esName = "resilience-service"; + private ScheduledExecutorService executor; + + + public void init() { + if (resilienceConfig.isEnabled()) { + executor = ExecutorServiceManager.newSingleThreadScheduledExecutor(esName); + executor.scheduleWithFixedDelay(() -> { + try { + resilienceNode(); + } catch (Exception e) { + logger.error("Resilience node failed", e); + } + }, 5 * 60, resilienceConfig.getCheckInterval(), TimeUnit.SECONDS); + } else { + logger.info("ResilienceService is disabled"); + } + } + + + public void resilienceNode() { + //update peers' bad feature 3 at first + tronNetDelegate.getActivePeer() + .forEach(peer -> peer.getMaliciousFeature().updateBadFeature3()); + + int peerSize = tronNetDelegate.getActivePeer().size(); + int activePeerSize = (int) tronNetDelegate.getActivePeer().stream() + .filter(peer -> peer.getChannel().isActive()) + .count(); + int findCount = 0; + + //1. if local node belongs to a lan network, disconnect with first malicious node if necessary + if (peerSize == activePeerSize && peerSize >= CommonParameter.getInstance().minConnections) { + findCount = findAndDisconnect() ? 1 : 0; + } + + //2. if local node's latestSaveBlockTime has not changed more than several minutes, + // it is isolated, we need to disconnect with some peers + if (findCount == 0) { + int advPeerCount = (int) tronNetDelegate.getActivePeer().stream() + .filter(peer -> !peer.isNeedSyncFromPeer() && !peer.isNeedSyncFromUs()) + .count(); + if (advPeerCount >= 1 && peerSize >= CommonParameter.getInstance().minConnections + && System.currentTimeMillis() - chainBaseManager.getLatestSaveBlockTime() + >= resilienceConfig.getBlockNotChangeTime() * 1000L) { + + //prefer to disconnect with active peer. if all are same, choose the oldest + List peerList = tronNetDelegate.getActivePeer().stream() + .filter(peer -> !peer.isDisconnect()) + .filter(peer -> !peer.getChannel().isTrustPeer()) + .filter(PeerConnection::isMalicious) + .sorted((o1, o2) -> { + if (o1.getChannel().isActive() && !o2.getChannel().isActive()) { + return -1; + } else if (!o1.getChannel().isActive() && o2.getChannel().isActive()) { + return 1; + } else { + return Long.compare(o1.getMaliciousFeature().getOldestTime(), + o2.getMaliciousFeature().getOldestTime()); + } + }) + .collect(Collectors.toList()); + + //choose most disconnectNumber peer + if (peerList.size() >= resilienceConfig.getDisconnectNumber()) { + peerList = peerList.subList(0, resilienceConfig.getDisconnectNumber()); + } + if (!peerList.isEmpty()) { + peerList.forEach(p -> p.disconnect(ReasonCode.MALICIOUS_NODE)); + findCount = peerList.size(); + } + } + } + + //3. if peers' number is equal or larger than maxConnections, disconnect with oldest peer + if (findCount == 0 && peerSize >= CommonParameter.getInstance().maxConnections) { + findCount = findAndDisconnect() ? 1 : 0; + } + + logger.info("Disconnect with {} malicious peer", findCount); + } + + private boolean findAndDisconnect() { + Optional p = tronNetDelegate.getActivePeer().stream() + .filter(peer -> !peer.isDisconnect()) + .filter(peer -> !peer.getChannel().isTrustPeer()) + .filter(PeerConnection::isMalicious) + .min(Comparator.comparing(peer -> peer.getMaliciousFeature().getOldestTime(), + Long::compareTo)); + + if (p.isPresent()) { + p.get().disconnect(ReasonCode.MALICIOUS_NODE); + return true; + } + return false; + } + + public void close() { + ExecutorServiceManager.shutdownAndAwaitTermination(executor, esName); + } +} diff --git a/protocol/src/main/protos/core/Tron.proto b/protocol/src/main/protos/core/Tron.proto index 2ffefbf9f3e..805a0ed2e8a 100644 --- a/protocol/src/main/protos/core/Tron.proto +++ b/protocol/src/main/protos/core/Tron.proto @@ -604,6 +604,7 @@ enum ReasonCode { BELOW_THAN_ME = 0X24; NOT_WITNESS = 0x25; NO_SUCH_MESSAGE = 0x26; + MALICIOUS_NODE = 0X27; UNKNOWN = 0xFF; } From 53dab13145e168cde89ec40d94029a511766503a Mon Sep 17 00:00:00 2001 From: jiangyuanshu <317787106@qq.com> Date: Wed, 26 Jun 2024 20:40:02 +0800 Subject: [PATCH 02/29] format MaliciousFeature --- .../org/tron/core/net/peer/PeerConnection.java | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/framework/src/main/java/org/tron/core/net/peer/PeerConnection.java b/framework/src/main/java/org/tron/core/net/peer/PeerConnection.java index 71e124e931d..4b1f94da9fd 100644 --- a/framework/src/main/java/org/tron/core/net/peer/PeerConnection.java +++ b/framework/src/main/java/org/tron/core/net/peer/PeerConnection.java @@ -246,7 +246,8 @@ public String log() { + "syncBlockRequestedSize:%d\n" + "remainNum:%d\n" + "syncChainRequested:%d\n" - + "blockInProcess:%d\n", + + "blockInProcess:%d\n" + + "feature:%s\n", channel.getInetSocketAddress(), (now - channel.getStartTime()) / Constant.ONE_THOUSAND, channel.getAvgLatency(), @@ -259,7 +260,8 @@ public String log() { remainNum, requested == null ? 0 : (now - requested.getValue()) / Constant.ONE_THOUSAND, - syncBlockInProcess.size()); + syncBlockInProcess.size(), + maliciousFeature); } public boolean isSyncFinish() { @@ -293,6 +295,7 @@ private void processDisconnect(Protocol.ReasonCode reason) { case BAD_PROTOCOL: case BAD_BLOCK: case BAD_TX: + case MALICIOUS_NODE: channel.close(BAD_PEER_BAN_TIME); break; default: @@ -381,6 +384,14 @@ public long getOldestTime() { } return Collections.min(times); } + + @Override + public String toString() { + return String.format("(1:[%s][%d] 2:[%s][%d] 3:[%s][%d])", + hasBadSyncBlockChain, badSyncBlockChainTime, + hasBadChainInventory, badChainInventoryTime, + isZombie, zombieBeginTime); + } } public boolean isMalicious() { From 1b6ca450c2a5f569f5032772980a138a5e29b392 Mon Sep 17 00:00:00 2001 From: jiangyuanshu <317787106@qq.com> Date: Wed, 26 Jun 2024 21:08:29 +0800 Subject: [PATCH 03/29] rename to peerNotActiveTime --- .../java/org/tron/common/parameter/ResilienceConfig.java | 2 +- common/src/main/java/org/tron/core/Constant.java | 2 +- .../src/main/java/org/tron/core/config/args/Args.java | 6 +++--- .../main/java/org/tron/core/net/peer/PeerConnection.java | 8 +++----- 4 files changed, 8 insertions(+), 10 deletions(-) diff --git a/common/src/main/java/org/tron/common/parameter/ResilienceConfig.java b/common/src/main/java/org/tron/common/parameter/ResilienceConfig.java index a756d077a3a..07782addc89 100644 --- a/common/src/main/java/org/tron/common/parameter/ResilienceConfig.java +++ b/common/src/main/java/org/tron/common/parameter/ResilienceConfig.java @@ -16,7 +16,7 @@ public class ResilienceConfig { @Getter @Setter - private int zombieThreshold = 300; + private int peerNotActiveTime = 300; @Getter @Setter diff --git a/common/src/main/java/org/tron/core/Constant.java b/common/src/main/java/org/tron/core/Constant.java index 3b8fa36b865..7e969b27505 100644 --- a/common/src/main/java/org/tron/core/Constant.java +++ b/common/src/main/java/org/tron/core/Constant.java @@ -126,7 +126,7 @@ public class Constant { public static final String NODE_RESILIENCE_ENABLE = "node.resilience.enable"; public static final String NODE_RESILIENCE_CHECK_INTERVAL = "node.resilience.checkInterval"; - public static final String NODE_RESILIENCE_ZOMBIE_THRESHOLD = "node.resilience.zombieThreshold"; + public static final String NODE_RESILIENCE_PEER_NOT_ACTIVE_TIME = "node.resilience.peerNotActiveTime"; public static final String NODE_RESILIENCE_BLOCK_NOT_CHANGE_TIME = "node.resilience.blockNotChangeTime"; public static final String NODE_RESILIENCE_DISCONNECT_NUMBER = "node.resilience.disconnectNumber"; diff --git a/framework/src/main/java/org/tron/core/config/args/Args.java b/framework/src/main/java/org/tron/core/config/args/Args.java index d2afc85dc7b..8be82bbcdbc 100644 --- a/framework/src/main/java/org/tron/core/config/args/Args.java +++ b/framework/src/main/java/org/tron/core/config/args/Args.java @@ -1498,9 +1498,9 @@ private static ResilienceConfig loadResilienceConfig(final com.typesafe.config.C if (config.hasPath(Constant.NODE_RESILIENCE_CHECK_INTERVAL)) { resilienceConfig.setCheckInterval(config.getInt(Constant.NODE_RESILIENCE_CHECK_INTERVAL)); } - if (config.hasPath(Constant.NODE_RESILIENCE_ZOMBIE_THRESHOLD)) { - resilienceConfig.setZombieThreshold( - config.getInt(Constant.NODE_RESILIENCE_ZOMBIE_THRESHOLD)); + if (config.hasPath(Constant.NODE_RESILIENCE_PEER_NOT_ACTIVE_TIME)) { + resilienceConfig.setPeerNotActiveTime( + config.getInt(Constant.NODE_RESILIENCE_PEER_NOT_ACTIVE_TIME)); } if (config.hasPath(Constant.NODE_RESILIENCE_BLOCK_NOT_CHANGE_TIME)) { resilienceConfig.setBlockNotChangeTime( diff --git a/framework/src/main/java/org/tron/core/net/peer/PeerConnection.java b/framework/src/main/java/org/tron/core/net/peer/PeerConnection.java index 4b1f94da9fd..add6688eaf1 100644 --- a/framework/src/main/java/org/tron/core/net/peer/PeerConnection.java +++ b/framework/src/main/java/org/tron/core/net/peer/PeerConnection.java @@ -6,7 +6,6 @@ import java.net.InetAddress; import java.net.InetSocketAddress; import java.util.ArrayList; -import java.util.Arrays; import java.util.Collections; import java.util.Deque; import java.util.HashSet; @@ -24,7 +23,6 @@ import org.springframework.context.annotation.Scope; import org.springframework.stereotype.Component; import org.tron.common.overlay.message.Message; -import org.tron.common.parameter.CommonParameter; import org.tron.common.prometheus.MetricKeys; import org.tron.common.prometheus.Metrics; import org.tron.common.utils.Pair; @@ -87,8 +85,8 @@ public class PeerConnection { private long advStartTime = -1; @Getter - private final long zombieThreshold = Args.getInstance().getResilienceConfig() - .getZombieThreshold(); + private final long peerNotActiveTime = Args.getInstance().getResilienceConfig() + .getPeerNotActiveTime(); @Getter @Setter @@ -365,7 +363,7 @@ public void updateBadFeature3() { isZombie = false; if (!needSyncFromPeer && !needSyncFromUs && System.currentTimeMillis() - Math.max(channel.getLastActiveTime(), advStartTime) - > zombieThreshold * 1000) { + > peerNotActiveTime * 1000) { this.isZombie = true; this.zombieBeginTime = Math.max(channel.getLastActiveTime(), advStartTime); } From 9e2b144e5274d22c5fecef287bd604415167d436 Mon Sep 17 00:00:00 2001 From: jiangyuanshu <317787106@qq.com> Date: Wed, 26 Jun 2024 21:35:07 +0800 Subject: [PATCH 04/29] modify default peerNotActiveTime to 600 --- .../main/java/org/tron/common/parameter/ResilienceConfig.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/src/main/java/org/tron/common/parameter/ResilienceConfig.java b/common/src/main/java/org/tron/common/parameter/ResilienceConfig.java index 07782addc89..12b68413ba6 100644 --- a/common/src/main/java/org/tron/common/parameter/ResilienceConfig.java +++ b/common/src/main/java/org/tron/common/parameter/ResilienceConfig.java @@ -16,7 +16,7 @@ public class ResilienceConfig { @Getter @Setter - private int peerNotActiveTime = 300; + private int peerNotActiveTime = 600; @Getter @Setter From 1b198f88dee8d59db45f95d8ade4fe197f8e5b3e Mon Sep 17 00:00:00 2001 From: jiangyuanshu <317787106@qq.com> Date: Thu, 27 Jun 2024 10:14:46 +0800 Subject: [PATCH 05/29] reduce log --- .../main/java/org/tron/core/config/args/Args.java | 1 + .../java/org/tron/core/net/peer/PeerConnection.java | 12 ++++++------ .../net/service/effective/ResilienceService.java | 10 ++++++++-- 3 files changed, 15 insertions(+), 8 deletions(-) diff --git a/framework/src/main/java/org/tron/core/config/args/Args.java b/framework/src/main/java/org/tron/core/config/args/Args.java index 8be82bbcdbc..218ff2665c6 100644 --- a/framework/src/main/java/org/tron/core/config/args/Args.java +++ b/framework/src/main/java/org/tron/core/config/args/Args.java @@ -1682,6 +1682,7 @@ public static void logConfig() { logger.info("Open full tcp disconnect: {}", parameter.isOpenFullTcpDisconnect()); logger.info("Node detect enable: {}", parameter.isNodeDetectEnable()); logger.info("Node effective check enable: {}", parameter.isNodeEffectiveCheckEnable()); + logger.info("Node resilience check enable: {}", parameter.resilienceConfig.isEnabled()); logger.info("Rate limiter global qps: {}", parameter.getRateLimiterGlobalQps()); logger.info("Rate limiter global ip qps: {}", parameter.getRateLimiterGlobalIpQps()); logger.info("Rate limiter global api qps: {}", parameter.getRateLimiterGlobalApiQps()); diff --git a/framework/src/main/java/org/tron/core/net/peer/PeerConnection.java b/framework/src/main/java/org/tron/core/net/peer/PeerConnection.java index add6688eaf1..ebbd91163ec 100644 --- a/framework/src/main/java/org/tron/core/net/peer/PeerConnection.java +++ b/framework/src/main/java/org/tron/core/net/peer/PeerConnection.java @@ -82,7 +82,7 @@ public class PeerConnection { private final MaliciousFeature maliciousFeature = new MaliciousFeature(); @Getter - private long advStartTime = -1; + private long advStartTime = System.currentTimeMillis(); @Getter private final long peerNotActiveTime = Args.getInstance().getResilienceConfig() @@ -257,7 +257,7 @@ public String log() { syncBlockRequested.size(), remainNum, requested == null ? 0 : (now - requested.getValue()) - / Constant.ONE_THOUSAND, + / Constant.ONE_THOUSAND, syncBlockInProcess.size(), maliciousFeature); } @@ -332,7 +332,7 @@ public synchronized boolean checkAndPutAdvInvRequest(Item key, Long value) { @Getter - public class MaliciousFeature { + public class MaliciousFeature { private boolean hasBadSyncBlockChain = false; private long badSyncBlockChainTime; @@ -361,11 +361,11 @@ public void updateBadFeature2() { // it is a zombie public void updateBadFeature3() { isZombie = false; + long tempTime = Math.max(channel.getLastActiveTime(), advStartTime); if (!needSyncFromPeer && !needSyncFromUs - && System.currentTimeMillis() - Math.max(channel.getLastActiveTime(), advStartTime) - > peerNotActiveTime * 1000) { + && System.currentTimeMillis() - tempTime > peerNotActiveTime * 1000) { this.isZombie = true; - this.zombieBeginTime = Math.max(channel.getLastActiveTime(), advStartTime); + this.zombieBeginTime = tempTime; } } diff --git a/framework/src/main/java/org/tron/core/net/service/effective/ResilienceService.java b/framework/src/main/java/org/tron/core/net/service/effective/ResilienceService.java index e305b3169e4..98f891739a8 100644 --- a/framework/src/main/java/org/tron/core/net/service/effective/ResilienceService.java +++ b/framework/src/main/java/org/tron/core/net/service/effective/ResilienceService.java @@ -98,7 +98,10 @@ public void resilienceNode() { peerList = peerList.subList(0, resilienceConfig.getDisconnectNumber()); } if (!peerList.isEmpty()) { - peerList.forEach(p -> p.disconnect(ReasonCode.MALICIOUS_NODE)); + peerList.forEach(p -> { + logger.info("feature {}: {}", p.getInetSocketAddress(), p.getMaliciousFeature()); + p.disconnect(ReasonCode.MALICIOUS_NODE); + }); findCount = peerList.size(); } } @@ -109,7 +112,9 @@ public void resilienceNode() { findCount = findAndDisconnect() ? 1 : 0; } - logger.info("Disconnect with {} malicious peer", findCount); + if (findCount > 0) { + logger.info("Disconnect with {} malicious peer", findCount); + } } private boolean findAndDisconnect() { @@ -121,6 +126,7 @@ private boolean findAndDisconnect() { Long::compareTo)); if (p.isPresent()) { + logger.info("feature {}: {}", p.get().getInetSocketAddress(), p.get().getMaliciousFeature()); p.get().disconnect(ReasonCode.MALICIOUS_NODE); return true; } From 5a41c621d43f7467c2106fc4087b2186e1b9549d Mon Sep 17 00:00:00 2001 From: jiangyuanshu <317787106@qq.com> Date: Thu, 27 Jun 2024 14:39:29 +0800 Subject: [PATCH 06/29] set type of peerNoBlockTime to seconds --- common/build.gradle | 2 +- framework/src/main/java/org/tron/core/config/args/Args.java | 4 ++-- framework/src/main/java/org/tron/core/net/TronNetService.java | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/common/build.gradle b/common/build.gradle index 9f491b533bd..c950300182a 100644 --- a/common/build.gradle +++ b/common/build.gradle @@ -46,7 +46,7 @@ dependencies { compile 'org.aspectj:aspectjrt:1.8.13' compile 'org.aspectj:aspectjweaver:1.8.13' compile 'org.aspectj:aspectjtools:1.8.13' - compile group: 'com.github.317787106', name: 'libp2p', version: 'v0.0.4',{ + compile group: 'com.github.317787106', name: 'libp2p', version: 'v0.0.5',{ exclude group: 'io.grpc', module: 'grpc-context' exclude group: 'io.grpc', module: 'grpc-core' exclude group: 'io.grpc', module: 'grpc-netty' diff --git a/framework/src/main/java/org/tron/core/config/args/Args.java b/framework/src/main/java/org/tron/core/config/args/Args.java index 218ff2665c6..900d75e91a1 100644 --- a/framework/src/main/java/org/tron/core/config/args/Args.java +++ b/framework/src/main/java/org/tron/core/config/args/Args.java @@ -173,7 +173,7 @@ public static void clearParam() { PARAMETER.estimateEnergyMaxRetry = 3; PARAMETER.receiveTcpMinDataLength = 2048; PARAMETER.isOpenFullTcpDisconnect = false; - PARAMETER.peerNoBlockTime = 300_000; + PARAMETER.peerNoBlockTime = 300; PARAMETER.nodeDetectEnable = false; PARAMETER.supportConstant = false; PARAMETER.debug = false; @@ -846,7 +846,7 @@ public static void setParam(final String[] args, final String confFileName) { PARAMETER.isOpenFullTcpDisconnect = config.hasPath(Constant.NODE_IS_OPEN_FULL_TCP_DISCONNECT) && config.getBoolean(Constant.NODE_IS_OPEN_FULL_TCP_DISCONNECT); PARAMETER.peerNoBlockTime = config.hasPath(Constant.NODE_PEER_NO_BLOCK_TIME) - ? config.getInt(Constant.NODE_PEER_NO_BLOCK_TIME) : 300_000; + ? config.getInt(Constant.NODE_PEER_NO_BLOCK_TIME) : 300; PARAMETER.nodeDetectEnable = config.hasPath(Constant.NODE_DETECT_ENABLE) && config.getBoolean(Constant.NODE_DETECT_ENABLE); diff --git a/framework/src/main/java/org/tron/core/net/TronNetService.java b/framework/src/main/java/org/tron/core/net/TronNetService.java index bda2d226365..bdebc0db879 100644 --- a/framework/src/main/java/org/tron/core/net/TronNetService.java +++ b/framework/src/main/java/org/tron/core/net/TronNetService.java @@ -183,7 +183,7 @@ private P2pConfig updateConfig(P2pConfig config) { config.setPort(parameter.getNodeListenPort()); config.setNetworkId(parameter.getNodeP2pVersion()); config.setDisconnectionPolicyEnable(parameter.isOpenFullTcpDisconnect()); - config.setNotActiveInterval(parameter.peerNoBlockTime); + config.setNotActiveInterval(parameter.peerNoBlockTime * 1000L); config.setNodeDetectEnable(parameter.isNodeDetectEnable()); config.setDiscoverEnable(parameter.isNodeDiscoveryEnable()); if (StringUtils.isEmpty(config.getIp()) && hasIpv4Stack(NetUtil.getAllLocalAddress())) { From 4af6102b025296f9e97d2a28c059f4b0c6832b90 Mon Sep 17 00:00:00 2001 From: jiangyuanshu <317787106@qq.com> Date: Thu, 27 Jun 2024 14:51:08 +0800 Subject: [PATCH 07/29] close resilienceService when TronNetService close --- framework/src/main/java/org/tron/core/net/TronNetService.java | 1 + 1 file changed, 1 insertion(+) diff --git a/framework/src/main/java/org/tron/core/net/TronNetService.java b/framework/src/main/java/org/tron/core/net/TronNetService.java index bdebc0db879..fdf082bb428 100644 --- a/framework/src/main/java/org/tron/core/net/TronNetService.java +++ b/framework/src/main/java/org/tron/core/net/TronNetService.java @@ -119,6 +119,7 @@ public void close() { transactionsMsgHandler.close(); fetchBlockService.close(); effectiveCheckService.close(); + resilienceService.close(); p2pService.close(); relayService.close(); logger.info("Net service closed successfully"); From d7d5aff9b6494c328aad081e0e2d55015e7995b9 Mon Sep 17 00:00:00 2001 From: jiangyuanshu <317787106@qq.com> Date: Thu, 27 Jun 2024 17:24:42 +0800 Subject: [PATCH 08/29] update feature 2 --- .../tron/core/net/messagehandler/SyncBlockChainMsgHandler.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/framework/src/main/java/org/tron/core/net/messagehandler/SyncBlockChainMsgHandler.java b/framework/src/main/java/org/tron/core/net/messagehandler/SyncBlockChainMsgHandler.java index f60421e6016..211cf18ebfa 100644 --- a/framework/src/main/java/org/tron/core/net/messagehandler/SyncBlockChainMsgHandler.java +++ b/framework/src/main/java/org/tron/core/net/messagehandler/SyncBlockChainMsgHandler.java @@ -38,7 +38,7 @@ public void processMessage(PeerConnection peer, TronMessage msg) throws P2pExcep List summaryChainIds = syncBlockChainMessage.getBlockIds(); if (peer.isNeedSyncFromUs() && summaryChainIds.size() == 1 && summaryChainIds.get(0).getNum() - == peer.getHelloMessageReceive().getHeadBlockId().getNum()) { + == peer.getHelloMessageSend().getHeadBlockId().getNum()) { peer.getMaliciousFeature().updateBadFeature2(); } From d7c0fef331b962d60d8c98063f0ef33ac8666541 Mon Sep 17 00:00:00 2001 From: jiangyuanshu <317787106@qq.com> Date: Thu, 27 Jun 2024 17:43:44 +0800 Subject: [PATCH 09/29] rearrange the close order --- .../main/java/org/tron/core/net/TronNetService.java | 2 +- .../net/service/effective/ResilienceService.java | 13 +++++++------ 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/framework/src/main/java/org/tron/core/net/TronNetService.java b/framework/src/main/java/org/tron/core/net/TronNetService.java index fdf082bb428..ba11e7ea0a7 100644 --- a/framework/src/main/java/org/tron/core/net/TronNetService.java +++ b/framework/src/main/java/org/tron/core/net/TronNetService.java @@ -118,8 +118,8 @@ public void close() { peerStatusCheck.close(); transactionsMsgHandler.close(); fetchBlockService.close(); - effectiveCheckService.close(); resilienceService.close(); + effectiveCheckService.close(); p2pService.close(); relayService.close(); logger.info("Net service closed successfully"); diff --git a/framework/src/main/java/org/tron/core/net/service/effective/ResilienceService.java b/framework/src/main/java/org/tron/core/net/service/effective/ResilienceService.java index 98f891739a8..46183ade894 100644 --- a/framework/src/main/java/org/tron/core/net/service/effective/ResilienceService.java +++ b/framework/src/main/java/org/tron/core/net/service/effective/ResilienceService.java @@ -98,10 +98,7 @@ public void resilienceNode() { peerList = peerList.subList(0, resilienceConfig.getDisconnectNumber()); } if (!peerList.isEmpty()) { - peerList.forEach(p -> { - logger.info("feature {}: {}", p.getInetSocketAddress(), p.getMaliciousFeature()); - p.disconnect(ReasonCode.MALICIOUS_NODE); - }); + peerList.forEach(this::disconnectMaliciousPeer); findCount = peerList.size(); } } @@ -126,13 +123,17 @@ private boolean findAndDisconnect() { Long::compareTo)); if (p.isPresent()) { - logger.info("feature {}: {}", p.get().getInetSocketAddress(), p.get().getMaliciousFeature()); - p.get().disconnect(ReasonCode.MALICIOUS_NODE); + disconnectMaliciousPeer(p.get()); return true; } return false; } + private void disconnectMaliciousPeer(PeerConnection p) { + logger.info("feature {}: {}", p.getInetSocketAddress(), p.getMaliciousFeature()); + p.disconnect(ReasonCode.MALICIOUS_NODE); + } + public void close() { ExecutorServiceManager.shutdownAndAwaitTermination(executor, esName); } From 75f405d2ff7bca93fbebedfba6426329f2b3e7df Mon Sep 17 00:00:00 2001 From: jiangyuanshu <317787106@qq.com> Date: Fri, 28 Jun 2024 10:06:29 +0800 Subject: [PATCH 10/29] simplfy PeerConnection --- .../tron/core/net/peer/PeerConnection.java | 44 ++++++++----------- 1 file changed, 19 insertions(+), 25 deletions(-) diff --git a/framework/src/main/java/org/tron/core/net/peer/PeerConnection.java b/framework/src/main/java/org/tron/core/net/peer/PeerConnection.java index ebbd91163ec..93315697cce 100644 --- a/framework/src/main/java/org/tron/core/net/peer/PeerConnection.java +++ b/framework/src/main/java/org/tron/core/net/peer/PeerConnection.java @@ -334,25 +334,20 @@ public synchronized boolean checkAndPutAdvInvRequest(Item key, Long value) { @Getter public class MaliciousFeature { - private boolean hasBadSyncBlockChain = false; - private long badSyncBlockChainTime; - private boolean hasBadChainInventory = false; - private long badChainInventoryTime; - private boolean isZombie = false; - private long zombieBeginTime; - - //it can only be set from false to true + private long badSyncBlockChainTime = -1; + private long badChainInventoryTime = -1; + private long zombieBeginTime = -1; + + //it can only be set from -1 to positive public void updateBadFeature1() { - if (!hasBadSyncBlockChain) { - hasBadSyncBlockChain = true; + if (badSyncBlockChainTime < 0) { badSyncBlockChainTime = System.currentTimeMillis(); } } - //it can only be set from false to true + //it can only be set from -1 to positive public void updateBadFeature2() { - if (!hasBadChainInventory) { - hasBadChainInventory = true; + if (badChainInventoryTime < 0) { badChainInventoryTime = System.currentTimeMillis(); } } @@ -360,41 +355,40 @@ public void updateBadFeature2() { // if peer is in adv status and no block received and sent between us for too long, // it is a zombie public void updateBadFeature3() { - isZombie = false; long tempTime = Math.max(channel.getLastActiveTime(), advStartTime); if (!needSyncFromPeer && !needSyncFromUs && System.currentTimeMillis() - tempTime > peerNotActiveTime * 1000) { - this.isZombie = true; - this.zombieBeginTime = tempTime; + zombieBeginTime = tempTime; } } public long getOldestTime() { List times = new ArrayList<>(); - if (hasBadSyncBlockChain) { + if (badSyncBlockChainTime > 0) { times.add(badSyncBlockChainTime); } - if (hasBadChainInventory) { + if (badChainInventoryTime > 0) { times.add(badChainInventoryTime); } - if (isZombie) { + if (zombieBeginTime > 0) { times.add(zombieBeginTime); } + if (times.isEmpty()) { + return -1; + } return Collections.min(times); } @Override public String toString() { - return String.format("(1:[%s][%d] 2:[%s][%d] 3:[%s][%d])", - hasBadSyncBlockChain, badSyncBlockChainTime, - hasBadChainInventory, badChainInventoryTime, - isZombie, zombieBeginTime); + return String.format("(1:[%d] 2:[%d] 3:[%d])", + badSyncBlockChainTime, badChainInventoryTime, zombieBeginTime); } } public boolean isMalicious() { - return maliciousFeature.hasBadSyncBlockChain || maliciousFeature.hasBadChainInventory - || maliciousFeature.isZombie; + return maliciousFeature.badSyncBlockChainTime > 0 || maliciousFeature.badChainInventoryTime > 0 + || maliciousFeature.zombieBeginTime > 0; } @Override From 22c54ed5ed613a789c1880ccad2b0ac76aa3033f Mon Sep 17 00:00:00 2001 From: jiangyuanshu <317787106@qq.com> Date: Fri, 28 Jun 2024 11:12:04 +0800 Subject: [PATCH 11/29] don't disconnect with active peer if connection is full in case 3 --- .../tron/core/net/service/effective/ResilienceService.java | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/framework/src/main/java/org/tron/core/net/service/effective/ResilienceService.java b/framework/src/main/java/org/tron/core/net/service/effective/ResilienceService.java index 46183ade894..4c874db935f 100644 --- a/framework/src/main/java/org/tron/core/net/service/effective/ResilienceService.java +++ b/framework/src/main/java/org/tron/core/net/service/effective/ResilienceService.java @@ -63,7 +63,7 @@ public void resilienceNode() { //1. if local node belongs to a lan network, disconnect with first malicious node if necessary if (peerSize == activePeerSize && peerSize >= CommonParameter.getInstance().minConnections) { - findCount = findAndDisconnect() ? 1 : 0; + findCount = findAndDisconnect(false) ? 1 : 0; } //2. if local node's latestSaveBlockTime has not changed more than several minutes, @@ -106,7 +106,7 @@ public void resilienceNode() { //3. if peers' number is equal or larger than maxConnections, disconnect with oldest peer if (findCount == 0 && peerSize >= CommonParameter.getInstance().maxConnections) { - findCount = findAndDisconnect() ? 1 : 0; + findCount = findAndDisconnect(true) ? 1 : 0; } if (findCount > 0) { @@ -114,11 +114,12 @@ public void resilienceNode() { } } - private boolean findAndDisconnect() { + private boolean findAndDisconnect(boolean excludeActive) { Optional p = tronNetDelegate.getActivePeer().stream() .filter(peer -> !peer.isDisconnect()) .filter(peer -> !peer.getChannel().isTrustPeer()) .filter(PeerConnection::isMalicious) + .filter(peer -> !excludeActive || !peer.getChannel().isActive()) .min(Comparator.comparing(peer -> peer.getMaliciousFeature().getOldestTime(), Long::compareTo)); From 720753715246edd7104c6b23be1af12d13b4a9b1 Mon Sep 17 00:00:00 2001 From: jiangyuanshu <317787106@qq.com> Date: Fri, 28 Jun 2024 14:16:29 +0800 Subject: [PATCH 12/29] add some log --- .../tron/core/net/service/effective/ResilienceService.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/framework/src/main/java/org/tron/core/net/service/effective/ResilienceService.java b/framework/src/main/java/org/tron/core/net/service/effective/ResilienceService.java index 4c874db935f..c18b8fa4449 100644 --- a/framework/src/main/java/org/tron/core/net/service/effective/ResilienceService.java +++ b/framework/src/main/java/org/tron/core/net/service/effective/ResilienceService.java @@ -72,9 +72,10 @@ public void resilienceNode() { int advPeerCount = (int) tronNetDelegate.getActivePeer().stream() .filter(peer -> !peer.isNeedSyncFromPeer() && !peer.isNeedSyncFromUs()) .count(); + long diff = System.currentTimeMillis() - chainBaseManager.getLatestSaveBlockTime(); if (advPeerCount >= 1 && peerSize >= CommonParameter.getInstance().minConnections - && System.currentTimeMillis() - chainBaseManager.getLatestSaveBlockTime() - >= resilienceConfig.getBlockNotChangeTime() * 1000L) { + && diff >= resilienceConfig.getBlockNotChangeTime() * 1000L) { + logger.warn("Node has been isolated for {} ms, try to disconnect some peers", diff); //prefer to disconnect with active peer. if all are same, choose the oldest List peerList = tronNetDelegate.getActivePeer().stream() From 2642b9f8b346d0cee862027ebff882bdb5a04164 Mon Sep 17 00:00:00 2001 From: jiangyuanshu <317787106@qq.com> Date: Fri, 28 Jun 2024 14:23:46 +0800 Subject: [PATCH 13/29] add condition when disconnect --- .../net/service/effective/ResilienceService.java | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/framework/src/main/java/org/tron/core/net/service/effective/ResilienceService.java b/framework/src/main/java/org/tron/core/net/service/effective/ResilienceService.java index c18b8fa4449..3ab24f4f796 100644 --- a/framework/src/main/java/org/tron/core/net/service/effective/ResilienceService.java +++ b/framework/src/main/java/org/tron/core/net/service/effective/ResilienceService.java @@ -63,7 +63,7 @@ public void resilienceNode() { //1. if local node belongs to a lan network, disconnect with first malicious node if necessary if (peerSize == activePeerSize && peerSize >= CommonParameter.getInstance().minConnections) { - findCount = findAndDisconnect(false) ? 1 : 0; + findCount = findAndDisconnect(false, "case1") ? 1 : 0; } //2. if local node's latestSaveBlockTime has not changed more than several minutes, @@ -99,7 +99,7 @@ public void resilienceNode() { peerList = peerList.subList(0, resilienceConfig.getDisconnectNumber()); } if (!peerList.isEmpty()) { - peerList.forEach(this::disconnectMaliciousPeer); + peerList.forEach(peer -> disconnectMaliciousPeer(peer, "case2")); findCount = peerList.size(); } } @@ -107,7 +107,7 @@ public void resilienceNode() { //3. if peers' number is equal or larger than maxConnections, disconnect with oldest peer if (findCount == 0 && peerSize >= CommonParameter.getInstance().maxConnections) { - findCount = findAndDisconnect(true) ? 1 : 0; + findCount = findAndDisconnect(true, "case3") ? 1 : 0; } if (findCount > 0) { @@ -115,7 +115,7 @@ public void resilienceNode() { } } - private boolean findAndDisconnect(boolean excludeActive) { + private boolean findAndDisconnect(boolean excludeActive, String condition) { Optional p = tronNetDelegate.getActivePeer().stream() .filter(peer -> !peer.isDisconnect()) .filter(peer -> !peer.getChannel().isTrustPeer()) @@ -125,14 +125,15 @@ private boolean findAndDisconnect(boolean excludeActive) { Long::compareTo)); if (p.isPresent()) { - disconnectMaliciousPeer(p.get()); + disconnectMaliciousPeer(p.get(), condition); return true; } return false; } - private void disconnectMaliciousPeer(PeerConnection p) { - logger.info("feature {}: {}", p.getInetSocketAddress(), p.getMaliciousFeature()); + private void disconnectMaliciousPeer(PeerConnection p, String condition) { + logger.info("feature {}: {}, condition:{}", p.getInetSocketAddress(), p.getMaliciousFeature(), + condition); p.disconnect(ReasonCode.MALICIOUS_NODE); } From edf6934079066b84d8ad60549a1f844169f4c43c Mon Sep 17 00:00:00 2001 From: jiangyuanshu <317787106@qq.com> Date: Fri, 28 Jun 2024 15:13:30 +0800 Subject: [PATCH 14/29] add feature if enables --- .../tron/core/net/peer/PeerConnection.java | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/framework/src/main/java/org/tron/core/net/peer/PeerConnection.java b/framework/src/main/java/org/tron/core/net/peer/PeerConnection.java index 93315697cce..c5d7e8fb048 100644 --- a/framework/src/main/java/org/tron/core/net/peer/PeerConnection.java +++ b/framework/src/main/java/org/tron/core/net/peer/PeerConnection.java @@ -78,6 +78,8 @@ public class PeerConnection { @Getter private volatile boolean isBadPeer; + private final boolean isResilienceEnabled = Args.getInstance().getResilienceConfig().isEnabled(); + @Getter private final MaliciousFeature maliciousFeature = new MaliciousFeature(); @@ -85,8 +87,7 @@ public class PeerConnection { private long advStartTime = System.currentTimeMillis(); @Getter - private final long peerNotActiveTime = Args.getInstance().getResilienceConfig() - .getPeerNotActiveTime(); + private long peerNotActiveTime = Args.getInstance().getResilienceConfig().getPeerNotActiveTime(); @Getter @Setter @@ -233,7 +234,7 @@ public String log() { long now = System.currentTimeMillis(); BlockId syncBlockId = syncBlockToFetch.peek(); Pair, Long> requested = syncChainRequested; - return String.format( + String data = String.format( "Peer %s\n" + "connect time: %ds [%sms]\n" + "last know block num: %s\n" @@ -244,8 +245,7 @@ public String log() { + "syncBlockRequestedSize:%d\n" + "remainNum:%d\n" + "syncChainRequested:%d\n" - + "blockInProcess:%d\n" - + "feature:%s\n", + + "blockInProcess:%d\n", channel.getInetSocketAddress(), (now - channel.getStartTime()) / Constant.ONE_THOUSAND, channel.getAvgLatency(), @@ -257,9 +257,12 @@ public String log() { syncBlockRequested.size(), remainNum, requested == null ? 0 : (now - requested.getValue()) - / Constant.ONE_THOUSAND, - syncBlockInProcess.size(), - maliciousFeature); + / Constant.ONE_THOUSAND, + syncBlockInProcess.size()); + if (isResilienceEnabled) { + data += String.format("feature:%s\n", maliciousFeature); + } + return data; } public boolean isSyncFinish() { From 81b4289b381cc3b5cde0b037cc5368824bdfb3be Mon Sep 17 00:00:00 2001 From: jiangyuanshu <317787106@qq.com> Date: Mon, 1 Jul 2024 18:10:05 +0800 Subject: [PATCH 15/29] add testcase ResilienceServiceTest --- .../tron/core/net/peer/PeerConnection.java | 4 +- .../src/main/resources/config-test-net.conf | 16 ++ framework/src/main/resources/config.conf | 16 ++ .../net/services/ResilienceServiceTest.java | 216 ++++++++++++++++++ framework/src/test/resources/config-test.conf | 20 ++ 5 files changed, 270 insertions(+), 2 deletions(-) create mode 100644 framework/src/test/java/org/tron/core/net/services/ResilienceServiceTest.java diff --git a/framework/src/main/java/org/tron/core/net/peer/PeerConnection.java b/framework/src/main/java/org/tron/core/net/peer/PeerConnection.java index c5d7e8fb048..12ac9273177 100644 --- a/framework/src/main/java/org/tron/core/net/peer/PeerConnection.java +++ b/framework/src/main/java/org/tron/core/net/peer/PeerConnection.java @@ -87,7 +87,7 @@ public class PeerConnection { private long advStartTime = System.currentTimeMillis(); @Getter - private long peerNotActiveTime = Args.getInstance().getResilienceConfig().getPeerNotActiveTime(); + private final long PeerNotActiveTime = Args.getInstance().getResilienceConfig().getPeerNotActiveTime(); @Getter @Setter @@ -360,7 +360,7 @@ public void updateBadFeature2() { public void updateBadFeature3() { long tempTime = Math.max(channel.getLastActiveTime(), advStartTime); if (!needSyncFromPeer && !needSyncFromUs - && System.currentTimeMillis() - tempTime > peerNotActiveTime * 1000) { + && System.currentTimeMillis() - tempTime > PeerNotActiveTime * 1000) { zombieBeginTime = tempTime; } } diff --git a/framework/src/main/resources/config-test-net.conf b/framework/src/main/resources/config-test-net.conf index ff292a3951c..7125d3b63de 100644 --- a/framework/src/main/resources/config-test-net.conf +++ b/framework/src/main/resources/config-test-net.conf @@ -70,6 +70,22 @@ node.backup { ] } +node.resilience { + enable = false + + # the period (seconds) of check zombie nodes + checkInterval = 60 + + # If one peer becomes inactive for more than peerNotActiveTime (seconds), it is a zombie. + peerNotActiveTime = 600 + + # if the latest block number stay unchanged for more than blockNotChangeTime (seconds), it is isolated + blockNotChangeTime = 300 + + # If the node is isolated, the number of zombie nodes will be disconnected. In other case, it's always 1 + disconnectNumber = 2 +} + node { # trust node for solidity node # trustNode = "ip:port" diff --git a/framework/src/main/resources/config.conf b/framework/src/main/resources/config.conf index 78427c30f87..df6cfb57b6f 100644 --- a/framework/src/main/resources/config.conf +++ b/framework/src/main/resources/config.conf @@ -148,6 +148,22 @@ crypto { # prometheus metrics end +node.resilience { + enable = false + + # the period (seconds) of check zombie nodes + checkInterval = 60 + + # If one peer becomes inactive for more than peerNotActiveTime (seconds), it is a zombie. + peerNotActiveTime = 600 + + # if the latest block number stay unchanged for more than blockNotChangeTime (seconds), it is isolated + blockNotChangeTime = 300 + + # If the node is isolated, the number of zombie nodes will be disconnected. In other case, it's always 1 + disconnectNumber = 2 +} + node { # trust node for solidity node # trustNode = "ip:port" diff --git a/framework/src/test/java/org/tron/core/net/services/ResilienceServiceTest.java b/framework/src/test/java/org/tron/core/net/services/ResilienceServiceTest.java new file mode 100644 index 00000000000..7aff52037b2 --- /dev/null +++ b/framework/src/test/java/org/tron/core/net/services/ResilienceServiceTest.java @@ -0,0 +1,216 @@ +package org.tron.core.net.services; + + +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.spy; + +import io.netty.channel.ChannelHandlerContext; +import java.io.IOException; +import java.net.InetSocketAddress; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; +import org.mockito.Mockito; +import org.tron.common.application.TronApplicationContext; +import org.tron.common.parameter.ResilienceConfig; +import org.tron.common.utils.ReflectUtils; +import org.tron.core.ChainBaseManager; +import org.tron.core.Constant; +import org.tron.core.config.DefaultConfig; +import org.tron.core.config.args.Args; +import org.tron.core.net.peer.PeerConnection; +import org.tron.core.net.peer.PeerManager; +import org.tron.core.net.service.effective.ResilienceService; +import org.tron.p2p.connection.Channel; + +public class ResilienceServiceTest { + + protected TronApplicationContext context; + private ResilienceService service; + private ChainBaseManager chainBaseManager; + + private ResilienceConfig resilienceConfig; + @Rule + public final TemporaryFolder temporaryFolder = new TemporaryFolder(); + + + @Before + public void init() throws IOException { + Args.setParam(new String[] {"--output-directory", + temporaryFolder.newFolder().toString(), "--debug"}, Constant.TEST_CONF); + context = new TronApplicationContext(DefaultConfig.class); + service = context.getBean(ResilienceService.class); + chainBaseManager = context.getBean(ChainBaseManager.class); + resilienceConfig = Args.getInstance().getResilienceConfig(); + } + + @After + public void destroy() { + Args.clearParam(); + context.destroy(); + } + + @Test + public void testCondition1() { + + int minConnection = 8; + Assert.assertEquals(minConnection, Args.getInstance().getMinConnections()); + clearPeers(); + Assert.assertEquals(0, PeerManager.getPeers().size()); + + List channelList = new ArrayList<>(); + for (int i = 0; i < minConnection; i++) { + InetSocketAddress inetSocketAddress = new InetSocketAddress("201.0.0." + i, 10001); + Channel c1 = spy(Channel.class); + ReflectUtils.setFieldValue(c1, "inetSocketAddress", inetSocketAddress); + ReflectUtils.setFieldValue(c1, "inetAddress", inetSocketAddress.getAddress()); + ReflectUtils.setFieldValue(c1, "isActive", true); + ReflectUtils.setFieldValue(c1, "ctx", spy(ChannelHandlerContext.class)); + Mockito.doNothing().when(c1).send((byte[]) any()); + + channelList.add(c1); + } + + for (int i = 0; i < minConnection - 1; i++) { + PeerManager.add(context, channelList.get(i)); + } + Assert.assertEquals(minConnection - 1, PeerManager.getPeers().size()); + + PeerManager.getPeers().get(0).getMaliciousFeature().updateBadFeature1(); + //not enough peers + service.resilienceNode(); + Assert.assertEquals(minConnection - 1, PeerManager.getPeers().size()); + + PeerManager.add(context, channelList.get(minConnection - 1)); + Assert.assertEquals(minConnection, PeerManager.getPeers().size()); + + //enough peers + service.resilienceNode(); + Assert.assertEquals(minConnection - 1, PeerManager.getPeers().size()); + } + + @Test + public void testCondition2() { + Assert.assertEquals(8, Args.getInstance().getMinConnections()); + Assert.assertEquals(30, Args.getInstance().getMaxConnections()); + clearPeers(); + Assert.assertEquals(0, PeerManager.getPeers().size()); + Assert.assertTrue(resilienceConfig.isEnabled()); + + // peer 1 ~ 10 are not active, needSyncFromPeer = true, needSyncFromUs = true + // peer 11 ~ 20 are active, needSyncFromPeer = false, needSyncFromUs = false + int totalNumber = 20; + List channelList = new ArrayList<>(); + long t1 = System.currentTimeMillis() - resilienceConfig.getPeerNotActiveTime() * 1000L - 1000L; + for (int i = 0; i < totalNumber; i++) { + InetSocketAddress inetSocketAddress = new InetSocketAddress("201.0.0." + i, 10001); + Channel c1 = spy(Channel.class); + ReflectUtils.setFieldValue(c1, "inetSocketAddress", inetSocketAddress); + ReflectUtils.setFieldValue(c1, "inetAddress", inetSocketAddress.getAddress()); + ReflectUtils.setFieldValue(c1, "isActive", i >= 10); + ReflectUtils.setFieldValue(c1, "ctx", spy(ChannelHandlerContext.class)); + ReflectUtils.setFieldValue(c1, "lastActiveTime", t1); + Mockito.doNothing().when(c1).send((byte[]) any()); + + channelList.add(c1); + } + + for (Channel channel : channelList) { + PeerManager.add(context, channel); + } + for (PeerConnection p : PeerManager.getPeers()) { //peer's order is not same as channelList + if (p.getChannel().isActive()) { + p.setNeedSyncFromPeer(false); + p.setNeedSyncFromUs(false); + ReflectUtils.setFieldValue(p, "advStartTime", t1); + p.getMaliciousFeature().updateBadFeature3(); + } + } + Assert.assertEquals(totalNumber, PeerManager.getPeers().size()); + + service.resilienceNode(); + Assert.assertEquals(totalNumber, PeerManager.getPeers().size()); + Assert.assertEquals(10, + PeerManager.getPeers().stream().filter(p -> p.getChannel().isActive()).count()); + + int blockNotChangeTime = resilienceConfig.getBlockNotChangeTime(); + int disconnectNumber = resilienceConfig.getDisconnectNumber(); + Assert.assertEquals(2, disconnectNumber); + // trigger that node is isolated + chainBaseManager.setLatestSaveBlockTime( + System.currentTimeMillis() - blockNotChangeTime * 1000L - 1000L); + + //disconnect some peer if node is isolated, prefer to disconnect active nodes + service.resilienceNode(); + Assert.assertEquals(totalNumber - disconnectNumber, PeerManager.getPeers().size()); + Assert.assertEquals(10 - disconnectNumber, + PeerManager.getPeers().stream().filter(p -> p.getChannel().isActive()).count()); + Assert.assertEquals(10, + PeerManager.getPeers().stream().filter(p -> !p.getChannel().isActive()).count()); + } + + @Test + public void testCondition3() { + int maxConnection = 30; + Assert.assertEquals(maxConnection, Args.getInstance().getMaxConnections()); + clearPeers(); + Assert.assertEquals(0, PeerManager.getPeers().size()); + + int activeNumber = 10; + List channelList = new ArrayList<>(); + for (int i = 0; i < maxConnection; i++) { + InetSocketAddress inetSocketAddress = new InetSocketAddress("201.0.0." + i, 10001); + Channel c1 = spy(Channel.class); + ReflectUtils.setFieldValue(c1, "inetSocketAddress", inetSocketAddress); + ReflectUtils.setFieldValue(c1, "inetAddress", inetSocketAddress.getAddress()); + ReflectUtils.setFieldValue(c1, "isActive", i < activeNumber); + ReflectUtils.setFieldValue(c1, "ctx", spy(ChannelHandlerContext.class)); + Mockito.doNothing().when(c1).send((byte[]) any()); + + channelList.add(c1); + } + + for (Channel channel : channelList) { + PeerManager.add(context, channel); + } + Assert.assertEquals(maxConnection, PeerManager.getPeers().size()); + + //set one active peer to malicious (any feature is ok) + PeerManager.getPeers().get(0).getMaliciousFeature().updateBadFeature1(); + service.resilienceNode(); + Assert.assertEquals(maxConnection, PeerManager.getPeers().size()); + + //set two passive peers to malicious (any feature is ok) + PeerManager.getPeers().get(activeNumber).getMaliciousFeature().updateBadFeature2(); + try { + Thread.sleep(100); + } catch (InterruptedException e) { + Assert.fail(); + } + PeerManager.getPeers().get(activeNumber + 5).getMaliciousFeature().updateBadFeature2(); + //verify that disconnect one malicious peer + service.resilienceNode(); + Assert.assertEquals(maxConnection - 1, PeerManager.getPeers().size()); + + //verify that disconnect oldest malicious peer + Set ipSet = new HashSet<>(); + for (PeerConnection p : PeerManager.getPeers()) { + ipSet.add(p.getChannel().getInetAddress().getHostName()); + } + Assert.assertFalse(ipSet.contains("201.0.0." + activeNumber)); + Assert.assertTrue(ipSet.contains("201.0.0." + (activeNumber + 5))); + } + + private void clearPeers() { + for (PeerConnection p : PeerManager.getPeers()) { + PeerManager.remove(p.getChannel()); + } + } +} diff --git a/framework/src/test/resources/config-test.conf b/framework/src/test/resources/config-test.conf index db24bb2a8a0..13c4dfd20f8 100644 --- a/framework/src/test/resources/config-test.conf +++ b/framework/src/test/resources/config-test.conf @@ -83,6 +83,22 @@ node.discovery = { external.ip = "46.168.1.1" } +node.resilience { + enable = true + + # the period (seconds) of check zombie nodes + checkInterval = 60 + + # If one peer becomes inactive for more than peerNotActiveTime (seconds), it is a zombie. + peerNotActiveTime = 600 + + # if the latest block number stay unchanged for more than blockNotChangeTime (seconds), it is isolated + blockNotChangeTime = 300 + + # If the node is isolated, the number of zombie nodes will be disconnected. In other case, it's always 1 + disconnectNumber = 2 +} + node { trustNode = "127.0.0.1:50051" @@ -91,6 +107,10 @@ node { connection.timeout = 2 + maxConnections = 30 + minConnections = 8 + minActiveConnections = 3 + active = [ # Sample entries: # { url = "enode://@hostname.com:30303" } From f2a1732a31c61ffa9963df68cfc08ce5c5861ec3 Mon Sep 17 00:00:00 2001 From: jiangyuanshu <317787106@qq.com> Date: Tue, 2 Jul 2024 16:21:15 +0800 Subject: [PATCH 16/29] rename name of config value --- .../org/tron/common/parameter/ResilienceConfig.java | 4 ++-- common/src/main/java/org/tron/core/Constant.java | 4 ++-- .../main/java/org/tron/core/config/args/Args.java | 12 ++++++------ .../java/org/tron/core/net/peer/PeerConnection.java | 6 +++--- .../net/service/effective/ResilienceService.java | 2 +- framework/src/main/resources/config-test-net.conf | 8 ++++---- framework/src/main/resources/config.conf | 8 ++++---- .../core/net/services/ResilienceServiceTest.java | 7 ++++--- framework/src/test/resources/config-test.conf | 8 ++++---- 9 files changed, 30 insertions(+), 29 deletions(-) diff --git a/common/src/main/java/org/tron/common/parameter/ResilienceConfig.java b/common/src/main/java/org/tron/common/parameter/ResilienceConfig.java index 12b68413ba6..478b16b22a5 100644 --- a/common/src/main/java/org/tron/common/parameter/ResilienceConfig.java +++ b/common/src/main/java/org/tron/common/parameter/ResilienceConfig.java @@ -16,11 +16,11 @@ public class ResilienceConfig { @Getter @Setter - private int peerNotActiveTime = 600; + private int peerNotActiveThreshold = 600; @Getter @Setter - private int blockNotChangeTime = 300; + private int blockNotChangeThreshold = 300; @Getter @Setter diff --git a/common/src/main/java/org/tron/core/Constant.java b/common/src/main/java/org/tron/core/Constant.java index 7e969b27505..3967207a5f8 100644 --- a/common/src/main/java/org/tron/core/Constant.java +++ b/common/src/main/java/org/tron/core/Constant.java @@ -126,8 +126,8 @@ public class Constant { public static final String NODE_RESILIENCE_ENABLE = "node.resilience.enable"; public static final String NODE_RESILIENCE_CHECK_INTERVAL = "node.resilience.checkInterval"; - public static final String NODE_RESILIENCE_PEER_NOT_ACTIVE_TIME = "node.resilience.peerNotActiveTime"; - public static final String NODE_RESILIENCE_BLOCK_NOT_CHANGE_TIME = "node.resilience.blockNotChangeTime"; + public static final String NODE_RESILIENCE_PEER_NOT_ACTIVE_THRESHOLD = "node.resilience.peerNotActiveThreshold"; + public static final String NODE_RESILIENCE_BLOCK_NOT_CHANGE_THRESHOLD = "node.resilience.blockNotChangeThreshold"; public static final String NODE_RESILIENCE_DISCONNECT_NUMBER = "node.resilience.disconnectNumber"; public static final String NODE_RPC_PORT = "node.rpc.port"; diff --git a/framework/src/main/java/org/tron/core/config/args/Args.java b/framework/src/main/java/org/tron/core/config/args/Args.java index 900d75e91a1..778bc9cb778 100644 --- a/framework/src/main/java/org/tron/core/config/args/Args.java +++ b/framework/src/main/java/org/tron/core/config/args/Args.java @@ -1498,13 +1498,13 @@ private static ResilienceConfig loadResilienceConfig(final com.typesafe.config.C if (config.hasPath(Constant.NODE_RESILIENCE_CHECK_INTERVAL)) { resilienceConfig.setCheckInterval(config.getInt(Constant.NODE_RESILIENCE_CHECK_INTERVAL)); } - if (config.hasPath(Constant.NODE_RESILIENCE_PEER_NOT_ACTIVE_TIME)) { - resilienceConfig.setPeerNotActiveTime( - config.getInt(Constant.NODE_RESILIENCE_PEER_NOT_ACTIVE_TIME)); + if (config.hasPath(Constant.NODE_RESILIENCE_PEER_NOT_ACTIVE_THRESHOLD)) { + resilienceConfig.setPeerNotActiveThreshold( + config.getInt(Constant.NODE_RESILIENCE_PEER_NOT_ACTIVE_THRESHOLD)); } - if (config.hasPath(Constant.NODE_RESILIENCE_BLOCK_NOT_CHANGE_TIME)) { - resilienceConfig.setBlockNotChangeTime( - config.getInt(Constant.NODE_RESILIENCE_BLOCK_NOT_CHANGE_TIME)); + if (config.hasPath(Constant.NODE_RESILIENCE_BLOCK_NOT_CHANGE_THRESHOLD)) { + resilienceConfig.setBlockNotChangeThreshold( + config.getInt(Constant.NODE_RESILIENCE_BLOCK_NOT_CHANGE_THRESHOLD)); } if (config.hasPath(Constant.NODE_RESILIENCE_DISCONNECT_NUMBER)) { resilienceConfig.setDisconnectNumber( diff --git a/framework/src/main/java/org/tron/core/net/peer/PeerConnection.java b/framework/src/main/java/org/tron/core/net/peer/PeerConnection.java index 081870f07cd..b61cafded27 100644 --- a/framework/src/main/java/org/tron/core/net/peer/PeerConnection.java +++ b/framework/src/main/java/org/tron/core/net/peer/PeerConnection.java @@ -87,8 +87,8 @@ public class PeerConnection { private long advStartTime = System.currentTimeMillis(); @Getter - private final long PeerNotActiveTime = Args.getInstance().getResilienceConfig() - .getPeerNotActiveTime(); + private final long PeerNotActiveThreshold = Args.getInstance().getResilienceConfig() + .getPeerNotActiveThreshold(); @Getter @Setter @@ -361,7 +361,7 @@ public void updateBadFeature2() { public void updateBadFeature3() { long tempTime = Math.max(channel.getLastActiveTime(), advStartTime); if (!needSyncFromPeer && !needSyncFromUs - && System.currentTimeMillis() - tempTime > PeerNotActiveTime * 1000) { + && System.currentTimeMillis() - tempTime > PeerNotActiveThreshold * 1000) { zombieBeginTime = tempTime; } } diff --git a/framework/src/main/java/org/tron/core/net/service/effective/ResilienceService.java b/framework/src/main/java/org/tron/core/net/service/effective/ResilienceService.java index 3ab24f4f796..3c25d04ff26 100644 --- a/framework/src/main/java/org/tron/core/net/service/effective/ResilienceService.java +++ b/framework/src/main/java/org/tron/core/net/service/effective/ResilienceService.java @@ -74,7 +74,7 @@ public void resilienceNode() { .count(); long diff = System.currentTimeMillis() - chainBaseManager.getLatestSaveBlockTime(); if (advPeerCount >= 1 && peerSize >= CommonParameter.getInstance().minConnections - && diff >= resilienceConfig.getBlockNotChangeTime() * 1000L) { + && diff >= resilienceConfig.getBlockNotChangeThreshold() * 1000L) { logger.warn("Node has been isolated for {} ms, try to disconnect some peers", diff); //prefer to disconnect with active peer. if all are same, choose the oldest diff --git a/framework/src/main/resources/config-test-net.conf b/framework/src/main/resources/config-test-net.conf index 7125d3b63de..c9c38592a85 100644 --- a/framework/src/main/resources/config-test-net.conf +++ b/framework/src/main/resources/config-test-net.conf @@ -76,11 +76,11 @@ node.resilience { # the period (seconds) of check zombie nodes checkInterval = 60 - # If one peer becomes inactive for more than peerNotActiveTime (seconds), it is a zombie. - peerNotActiveTime = 600 + # If one peer becomes inactive for more than peerNotActiveThreshold (seconds), it is a zombie. + peerNotActiveThreshold = 600 - # if the latest block number stay unchanged for more than blockNotChangeTime (seconds), it is isolated - blockNotChangeTime = 300 + # if the latest block number stay unchanged for more than blockNotChangeThreshold (seconds), it is isolated + blockNotChangeThreshold = 300 # If the node is isolated, the number of zombie nodes will be disconnected. In other case, it's always 1 disconnectNumber = 2 diff --git a/framework/src/main/resources/config.conf b/framework/src/main/resources/config.conf index df6cfb57b6f..6c47901016b 100644 --- a/framework/src/main/resources/config.conf +++ b/framework/src/main/resources/config.conf @@ -154,11 +154,11 @@ node.resilience { # the period (seconds) of check zombie nodes checkInterval = 60 - # If one peer becomes inactive for more than peerNotActiveTime (seconds), it is a zombie. - peerNotActiveTime = 600 + # If one peer becomes inactive for more than peerNotActiveThreshold (seconds), it is a zombie. + peerNotActiveThreshold = 600 - # if the latest block number stay unchanged for more than blockNotChangeTime (seconds), it is isolated - blockNotChangeTime = 300 + # if the latest block number stay unchanged for more than blockNotChangeThreshold (seconds), it is isolated + blockNotChangeThreshold = 300 # If the node is isolated, the number of zombie nodes will be disconnected. In other case, it's always 1 disconnectNumber = 2 diff --git a/framework/src/test/java/org/tron/core/net/services/ResilienceServiceTest.java b/framework/src/test/java/org/tron/core/net/services/ResilienceServiceTest.java index b03a6bd8d69..85d62e218a6 100644 --- a/framework/src/test/java/org/tron/core/net/services/ResilienceServiceTest.java +++ b/framework/src/test/java/org/tron/core/net/services/ResilienceServiceTest.java @@ -107,7 +107,8 @@ public void testCondition2() { // peer 11 ~ 20 are active, needSyncFromPeer = false, needSyncFromUs = false int totalNumber = 20; List channelList = new ArrayList<>(); - long t1 = System.currentTimeMillis() - resilienceConfig.getPeerNotActiveTime() * 1000L - 1000L; + long t1 = + System.currentTimeMillis() - resilienceConfig.getPeerNotActiveThreshold() * 1000L - 1000L; for (int i = 0; i < totalNumber; i++) { InetSocketAddress inetSocketAddress = new InetSocketAddress("201.0.0." + i, 10001); Channel c1 = spy(Channel.class); @@ -139,12 +140,12 @@ public void testCondition2() { Assert.assertEquals(10, PeerManager.getPeers().stream().filter(p -> p.getChannel().isActive()).count()); - int blockNotChangeTime = resilienceConfig.getBlockNotChangeTime(); + int blockNotChangeThreshold = resilienceConfig.getBlockNotChangeThreshold(); int disconnectNumber = resilienceConfig.getDisconnectNumber(); Assert.assertEquals(2, disconnectNumber); // trigger that node is isolated chainBaseManager.setLatestSaveBlockTime( - System.currentTimeMillis() - blockNotChangeTime * 1000L - 1000L); + System.currentTimeMillis() - blockNotChangeThreshold * 1000L - 1000L); //disconnect some peer if node is isolated, prefer to disconnect active nodes service.resilienceNode(); diff --git a/framework/src/test/resources/config-test.conf b/framework/src/test/resources/config-test.conf index 13c4dfd20f8..e6764fe7a4f 100644 --- a/framework/src/test/resources/config-test.conf +++ b/framework/src/test/resources/config-test.conf @@ -89,11 +89,11 @@ node.resilience { # the period (seconds) of check zombie nodes checkInterval = 60 - # If one peer becomes inactive for more than peerNotActiveTime (seconds), it is a zombie. - peerNotActiveTime = 600 + # If one peer becomes inactive for more than peerNotActiveThreshold (seconds), it is a zombie. + peerNotActiveThreshold = 600 - # if the latest block number stay unchanged for more than blockNotChangeTime (seconds), it is isolated - blockNotChangeTime = 300 + # if the latest block number stay unchanged for more than blockNotChangeThreshold (seconds), it is isolated + blockNotChangeThreshold = 300 # If the node is isolated, the number of zombie nodes will be disconnected. In other case, it's always 1 disconnectNumber = 2 From 406c5c20bbffd149a90108f53b224722afaaeec7 Mon Sep 17 00:00:00 2001 From: jiangyuanshu <317787106@qq.com> Date: Wed, 3 Jul 2024 12:25:40 +0800 Subject: [PATCH 17/29] schedule to test after 10 seconds --- .../common/parameter/ResilienceConfig.java | 4 + .../src/main/java/org/tron/core/Constant.java | 1 + .../java/org/tron/core/config/args/Args.java | 3 + .../messagehandler/InventoryMsgHandler.java | 5 ++ .../tron/core/net/peer/PeerConnection.java | 90 ++++++++++++++----- .../tron/core/net/service/adv/AdvService.java | 16 +++- .../service/effective/ResilienceService.java | 6 +- .../src/main/resources/config-test-net.conf | 4 + framework/src/main/resources/config.conf | 4 + .../net/services/ResilienceServiceTest.java | 2 +- framework/src/test/resources/config-test.conf | 4 + 11 files changed, 112 insertions(+), 27 deletions(-) diff --git a/common/src/main/java/org/tron/common/parameter/ResilienceConfig.java b/common/src/main/java/org/tron/common/parameter/ResilienceConfig.java index 478b16b22a5..c3996a5fd41 100644 --- a/common/src/main/java/org/tron/common/parameter/ResilienceConfig.java +++ b/common/src/main/java/org/tron/common/parameter/ResilienceConfig.java @@ -22,6 +22,10 @@ public class ResilienceConfig { @Setter private int blockNotChangeThreshold = 300; + @Getter + @Setter + private boolean testStopInv = false; + @Getter @Setter private int disconnectNumber = 1; diff --git a/common/src/main/java/org/tron/core/Constant.java b/common/src/main/java/org/tron/core/Constant.java index 3967207a5f8..e7f3a11fe14 100644 --- a/common/src/main/java/org/tron/core/Constant.java +++ b/common/src/main/java/org/tron/core/Constant.java @@ -128,6 +128,7 @@ public class Constant { public static final String NODE_RESILIENCE_CHECK_INTERVAL = "node.resilience.checkInterval"; public static final String NODE_RESILIENCE_PEER_NOT_ACTIVE_THRESHOLD = "node.resilience.peerNotActiveThreshold"; public static final String NODE_RESILIENCE_BLOCK_NOT_CHANGE_THRESHOLD = "node.resilience.blockNotChangeThreshold"; + public static final String NODE_RESILIENCE_TEST_STOP_INV = "node.resilience.testStopInv"; public static final String NODE_RESILIENCE_DISCONNECT_NUMBER = "node.resilience.disconnectNumber"; public static final String NODE_RPC_PORT = "node.rpc.port"; diff --git a/framework/src/main/java/org/tron/core/config/args/Args.java b/framework/src/main/java/org/tron/core/config/args/Args.java index 778bc9cb778..420104461c0 100644 --- a/framework/src/main/java/org/tron/core/config/args/Args.java +++ b/framework/src/main/java/org/tron/core/config/args/Args.java @@ -1506,6 +1506,9 @@ private static ResilienceConfig loadResilienceConfig(final com.typesafe.config.C resilienceConfig.setBlockNotChangeThreshold( config.getInt(Constant.NODE_RESILIENCE_BLOCK_NOT_CHANGE_THRESHOLD)); } + if (config.hasPath(Constant.NODE_RESILIENCE_TEST_STOP_INV)) { + resilienceConfig.setTestStopInv(config.getBoolean(Constant.NODE_RESILIENCE_TEST_STOP_INV)); + } if (config.hasPath(Constant.NODE_RESILIENCE_DISCONNECT_NUMBER)) { resilienceConfig.setDisconnectNumber( config.getInt(Constant.NODE_RESILIENCE_DISCONNECT_NUMBER)); diff --git a/framework/src/main/java/org/tron/core/net/messagehandler/InventoryMsgHandler.java b/framework/src/main/java/org/tron/core/net/messagehandler/InventoryMsgHandler.java index a8ad8d0ec73..f856324743c 100644 --- a/framework/src/main/java/org/tron/core/net/messagehandler/InventoryMsgHandler.java +++ b/framework/src/main/java/org/tron/core/net/messagehandler/InventoryMsgHandler.java @@ -39,6 +39,11 @@ public void processMessage(PeerConnection peer, TronMessage msg) { Item item = new Item(id, type); peer.getAdvInvReceive().put(item, System.currentTimeMillis()); advService.addInv(item); + + if (type.equals(InventoryType.BLOCK) && peer.getAdvInvSpread().getIfPresent(item) == null) { + peer.getMaliciousFeature().setLastRecBlockInvTime(System.currentTimeMillis()); + peer.getMaliciousFeature().resetStopBlockInvTime(); //stop test + } } } diff --git a/framework/src/main/java/org/tron/core/net/peer/PeerConnection.java b/framework/src/main/java/org/tron/core/net/peer/PeerConnection.java index b61cafded27..13e5a6dafa1 100644 --- a/framework/src/main/java/org/tron/core/net/peer/PeerConnection.java +++ b/framework/src/main/java/org/tron/core/net/peer/PeerConnection.java @@ -6,6 +6,7 @@ import java.net.InetAddress; import java.net.InetSocketAddress; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.Deque; import java.util.HashSet; @@ -23,6 +24,7 @@ import org.springframework.context.annotation.Scope; import org.springframework.stereotype.Component; import org.tron.common.overlay.message.Message; +import org.tron.common.parameter.ResilienceConfig; import org.tron.common.prometheus.MetricKeys; import org.tron.common.prometheus.Metrics; import org.tron.common.utils.Pair; @@ -78,18 +80,11 @@ public class PeerConnection { @Getter private volatile boolean isBadPeer; - private final boolean isResilienceEnabled = Args.getInstance().getResilienceConfig().isEnabled(); + private final ResilienceConfig resilienceConfig = Args.getInstance().getResilienceConfig(); @Getter private final MaliciousFeature maliciousFeature = new MaliciousFeature(); - @Getter - private long advStartTime = System.currentTimeMillis(); - - @Getter - private final long PeerNotActiveThreshold = Args.getInstance().getResilienceConfig() - .getPeerNotActiveThreshold(); - @Getter @Setter private ByteString address; @@ -183,7 +178,7 @@ public void setBlockBothHave(BlockId blockId) { public void updateAdvStartTime() { if (!needSyncFromPeer && !needSyncFromUs) { - this.advStartTime = System.currentTimeMillis(); + this.getMaliciousFeature().advStartTime = System.currentTimeMillis(); } } @@ -260,7 +255,7 @@ public String log() { requested == null ? 0 : (now - requested.getValue()) / Constant.ONE_THOUSAND, syncBlockInProcess.size()); - if (isResilienceEnabled) { + if (resilienceConfig.isEnabled()) { data += String.format("feature:%s\n", maliciousFeature); } return data; @@ -334,13 +329,42 @@ public synchronized boolean checkAndPutAdvInvRequest(Item key, Long value) { return true; } + // if peer is in adv status and no block received and sent between us for too long, and we + // don't receive block inventory also, then the peer is not active. Use only half of threshold. + public boolean isNotActiveTooLong() { + return System.currentTimeMillis() - getLatestTime() + > resilienceConfig.getPeerNotActiveThreshold() / 2 * 1000L; + } + + private long getLatestTime() { + List times = Arrays.asList( + channel.getLastActiveTime(), //last block time + getMaliciousFeature().getAdvStartTime(), //adv begin time + getMaliciousFeature().getLastRecBlockInvTime()); //inventory + return Collections.max(times); + } @Getter public class MaliciousFeature { - private long badSyncBlockChainTime = -1; - private long badChainInventoryTime = -1; - private long zombieBeginTime = -1; + @Setter + private long advStartTime = System.currentTimeMillis(); + @Setter + private long stopBlockInvTime = -1; + @Setter + private long lastRecBlockInvTime = System.currentTimeMillis(); + //if testStopInv=true, we use feature 4, else use feature 3. We an only use one of them. + private final boolean testStopInv; + + //four features + private long badSyncBlockChainTime = -1; //feature 1 + private long badChainInventoryTime = -1; //feature 2 + private long zombieBeginTime = -1; //feature 3 + private long zombieBeginTime2 = -1; //feature 4 + + public MaliciousFeature() { + testStopInv = Args.getInstance().getResilienceConfig().isTestStopInv(); + } //it can only be set from -1 to positive public void updateBadFeature1() { @@ -357,16 +381,29 @@ public void updateBadFeature2() { } // if peer is in adv status and no block received and sent between us for too long, - // it is a zombie + // it is a zombie. public void updateBadFeature3() { long tempTime = Math.max(channel.getLastActiveTime(), advStartTime); - if (!needSyncFromPeer && !needSyncFromUs - && System.currentTimeMillis() - tempTime > PeerNotActiveThreshold * 1000) { + if (!needSyncFromPeer && !needSyncFromUs && System.currentTimeMillis() - tempTime + > resilienceConfig.getPeerNotActiveThreshold() * 1000L) { zombieBeginTime = tempTime; } } - public long getOldestTime() { + //if receive block inventory from peer 0 ~ 10 seconds later after inventory check, it's ok. + // else it's malicious. it can only be set from -1 to positive + public void updateBadFeature4() { + if (zombieBeginTime2 < 0 + && maliciousFeature.lastRecBlockInvTime < maliciousFeature.stopBlockInvTime) { + zombieBeginTime2 = getLatestTime(); + } + } + + public void resetStopBlockInvTime() { + stopBlockInvTime = -1; + } + + public long getEarliestTime() { List times = new ArrayList<>(); if (badSyncBlockChainTime > 0) { times.add(badSyncBlockChainTime); @@ -374,8 +411,15 @@ public long getOldestTime() { if (badChainInventoryTime > 0) { times.add(badChainInventoryTime); } - if (zombieBeginTime > 0) { - times.add(zombieBeginTime); + + if (!testStopInv) { + if (zombieBeginTime > 0) { + times.add(zombieBeginTime); + } + } else { + if (zombieBeginTime2 > 0) { + times.add(zombieBeginTime2); + } } if (times.isEmpty()) { return -1; @@ -385,14 +429,16 @@ public long getOldestTime() { @Override public String toString() { - return String.format("(1:[%d] 2:[%d] 3:[%d])", - badSyncBlockChainTime, badChainInventoryTime, zombieBeginTime); + return String.format("(1:[%d] 2:[%d] 3:[%d] 4:[%d])", + badSyncBlockChainTime, badChainInventoryTime, zombieBeginTime, zombieBeginTime2); } } public boolean isMalicious() { + boolean isMalicious = resilienceConfig.isTestStopInv() ? (maliciousFeature.zombieBeginTime2 > 0) + : (maliciousFeature.zombieBeginTime > 0); return maliciousFeature.badSyncBlockChainTime > 0 || maliciousFeature.badChainInventoryTime > 0 - || maliciousFeature.zombieBeginTime > 0; + || isMalicious; } @Override diff --git a/framework/src/main/java/org/tron/core/net/service/adv/AdvService.java b/framework/src/main/java/org/tron/core/net/service/adv/AdvService.java index 505b53358c8..a7f6791f8bc 100644 --- a/framework/src/main/java/org/tron/core/net/service/adv/AdvService.java +++ b/framework/src/main/java/org/tron/core/net/service/adv/AdvService.java @@ -46,6 +46,7 @@ public class AdvService { private final int MAX_BLOCK_CACHE_SIZE = 10; private final int MAX_SPREAD_SIZE = 1_000; private final long TIMEOUT = MSG_CACHE_DURATION_IN_BLOCKS * BLOCK_PRODUCED_INTERVAL; + private final boolean isResilienceEnabled = Args.getInstance().getResilienceConfig().isEnabled(); @Autowired private TronNetDelegate tronNetDelegate; @@ -73,12 +74,16 @@ public class AdvService { private final String spreadName = "adv-spread"; private final String fetchName = "adv-fetch"; + private final String invCheckName = "inv-check"; private final ScheduledExecutorService spreadExecutor = ExecutorServiceManager .newSingleThreadScheduledExecutor(spreadName); private final ScheduledExecutorService fetchExecutor = ExecutorServiceManager .newSingleThreadScheduledExecutor(fetchName); + private final ScheduledExecutorService invCheckExecutor = ExecutorServiceManager + .newSingleThreadScheduledExecutor(invCheckName); + @Getter private MessageCount trxCount = new MessageCount(); @@ -368,7 +373,16 @@ public void sendInv() { } if (key.equals(InventoryType.BLOCK)) { value.sort(Comparator.comparingLong(value1 -> new BlockId(value1).getNum())); - peer.sendMessage(new InventoryMessage(value, key)); + if (isResilienceEnabled && peer.isNotActiveTooLong() + && peer.getMaliciousFeature().getStopBlockInvTime() == -1) { + //if peer is not active for too long, test if peer will broadcast block inventory to me + //after I stop broadcasting block inventory to it + peer.getMaliciousFeature().setStopBlockInvTime(System.currentTimeMillis()); + invCheckExecutor.schedule(() -> peer.getMaliciousFeature().updateBadFeature4(), + 10, TimeUnit.SECONDS); + } else { + peer.sendMessage(new InventoryMessage(value, key)); + } } else { peer.sendMessage(new InventoryMessage(value, key)); } diff --git a/framework/src/main/java/org/tron/core/net/service/effective/ResilienceService.java b/framework/src/main/java/org/tron/core/net/service/effective/ResilienceService.java index 3c25d04ff26..7989e3a6a68 100644 --- a/framework/src/main/java/org/tron/core/net/service/effective/ResilienceService.java +++ b/framework/src/main/java/org/tron/core/net/service/effective/ResilienceService.java @@ -88,8 +88,8 @@ public void resilienceNode() { } else if (!o1.getChannel().isActive() && o2.getChannel().isActive()) { return 1; } else { - return Long.compare(o1.getMaliciousFeature().getOldestTime(), - o2.getMaliciousFeature().getOldestTime()); + return Long.compare(o1.getMaliciousFeature().getEarliestTime(), + o2.getMaliciousFeature().getEarliestTime()); } }) .collect(Collectors.toList()); @@ -121,7 +121,7 @@ private boolean findAndDisconnect(boolean excludeActive, String condition) { .filter(peer -> !peer.getChannel().isTrustPeer()) .filter(PeerConnection::isMalicious) .filter(peer -> !excludeActive || !peer.getChannel().isActive()) - .min(Comparator.comparing(peer -> peer.getMaliciousFeature().getOldestTime(), + .min(Comparator.comparing(peer -> peer.getMaliciousFeature().getEarliestTime(), Long::compareTo)); if (p.isPresent()) { diff --git a/framework/src/main/resources/config-test-net.conf b/framework/src/main/resources/config-test-net.conf index c9c38592a85..0fcc22d8549 100644 --- a/framework/src/main/resources/config-test-net.conf +++ b/framework/src/main/resources/config-test-net.conf @@ -82,6 +82,10 @@ node.resilience { # if the latest block number stay unchanged for more than blockNotChangeThreshold (seconds), it is isolated blockNotChangeThreshold = 300 + # if peer is not active for too long, whether to test if peer will broadcast block inventory to me + # after I stop broadcasting block inventory to it. + testStopInv = false + # If the node is isolated, the number of zombie nodes will be disconnected. In other case, it's always 1 disconnectNumber = 2 } diff --git a/framework/src/main/resources/config.conf b/framework/src/main/resources/config.conf index 6c47901016b..4aca6ab1f15 100644 --- a/framework/src/main/resources/config.conf +++ b/framework/src/main/resources/config.conf @@ -160,6 +160,10 @@ node.resilience { # if the latest block number stay unchanged for more than blockNotChangeThreshold (seconds), it is isolated blockNotChangeThreshold = 300 + # if peer is not active for too long, whether to test if peer will broadcast block inventory to me + # after I stop broadcasting block inventory to it. + testStopInv = false + # If the node is isolated, the number of zombie nodes will be disconnected. In other case, it's always 1 disconnectNumber = 2 } diff --git a/framework/src/test/java/org/tron/core/net/services/ResilienceServiceTest.java b/framework/src/test/java/org/tron/core/net/services/ResilienceServiceTest.java index 85d62e218a6..f285a874560 100644 --- a/framework/src/test/java/org/tron/core/net/services/ResilienceServiceTest.java +++ b/framework/src/test/java/org/tron/core/net/services/ResilienceServiceTest.java @@ -129,7 +129,7 @@ public void testCondition2() { if (p.getChannel().isActive()) { p.setNeedSyncFromPeer(false); p.setNeedSyncFromUs(false); - ReflectUtils.setFieldValue(p, "advStartTime", t1); + p.getMaliciousFeature().setAdvStartTime(t1); p.getMaliciousFeature().updateBadFeature3(); } } diff --git a/framework/src/test/resources/config-test.conf b/framework/src/test/resources/config-test.conf index e6764fe7a4f..c8997d242a7 100644 --- a/framework/src/test/resources/config-test.conf +++ b/framework/src/test/resources/config-test.conf @@ -95,6 +95,10 @@ node.resilience { # if the latest block number stay unchanged for more than blockNotChangeThreshold (seconds), it is isolated blockNotChangeThreshold = 300 + # if peer is not active for too long, whether to test if peer will broadcast block inventory to me + # after I stop broadcasting block inventory to it. + testStopInv = false + # If the node is isolated, the number of zombie nodes will be disconnected. In other case, it's always 1 disconnectNumber = 2 } From c5c3add84dcddeca6c8dcc04770344e5dc83af86 Mon Sep 17 00:00:00 2001 From: jiangyuanshu <317787106@qq.com> Date: Wed, 3 Jul 2024 12:42:58 +0800 Subject: [PATCH 18/29] only one of feature3 and feature4 is used --- .../java/org/tron/core/net/service/adv/AdvService.java | 5 +++-- .../core/net/service/effective/ResilienceService.java | 8 +++++--- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/framework/src/main/java/org/tron/core/net/service/adv/AdvService.java b/framework/src/main/java/org/tron/core/net/service/adv/AdvService.java index a7f6791f8bc..af17e3e0d47 100644 --- a/framework/src/main/java/org/tron/core/net/service/adv/AdvService.java +++ b/framework/src/main/java/org/tron/core/net/service/adv/AdvService.java @@ -46,7 +46,8 @@ public class AdvService { private final int MAX_BLOCK_CACHE_SIZE = 10; private final int MAX_SPREAD_SIZE = 1_000; private final long TIMEOUT = MSG_CACHE_DURATION_IN_BLOCKS * BLOCK_PRODUCED_INTERVAL; - private final boolean isResilienceEnabled = Args.getInstance().getResilienceConfig().isEnabled(); + private final boolean testStopInv = Args.getInstance().getResilienceConfig().isEnabled() + && Args.getInstance().getResilienceConfig().isTestStopInv(); @Autowired private TronNetDelegate tronNetDelegate; @@ -373,7 +374,7 @@ public void sendInv() { } if (key.equals(InventoryType.BLOCK)) { value.sort(Comparator.comparingLong(value1 -> new BlockId(value1).getNum())); - if (isResilienceEnabled && peer.isNotActiveTooLong() + if (testStopInv && peer.isNotActiveTooLong() && peer.getMaliciousFeature().getStopBlockInvTime() == -1) { //if peer is not active for too long, test if peer will broadcast block inventory to me //after I stop broadcasting block inventory to it diff --git a/framework/src/main/java/org/tron/core/net/service/effective/ResilienceService.java b/framework/src/main/java/org/tron/core/net/service/effective/ResilienceService.java index 7989e3a6a68..316507f325d 100644 --- a/framework/src/main/java/org/tron/core/net/service/effective/ResilienceService.java +++ b/framework/src/main/java/org/tron/core/net/service/effective/ResilienceService.java @@ -51,9 +51,11 @@ public void init() { public void resilienceNode() { - //update peers' bad feature 3 at first - tronNetDelegate.getActivePeer() - .forEach(peer -> peer.getMaliciousFeature().updateBadFeature3()); + if (!resilienceConfig.isTestStopInv()) { + //update peers' bad feature 3 at first + tronNetDelegate.getActivePeer() + .forEach(peer -> peer.getMaliciousFeature().updateBadFeature3()); + } int peerSize = tronNetDelegate.getActivePeer().size(); int activePeerSize = (int) tronNetDelegate.getActivePeer().stream() From 90af8ef9fe75b90a5969b06b65b583372f91feba Mon Sep 17 00:00:00 2001 From: jiangyuanshu <317787106@qq.com> Date: Wed, 3 Jul 2024 13:05:59 +0800 Subject: [PATCH 19/29] use same peerNotActiveThreshold for block and inventory --- .../src/main/java/org/tron/core/net/peer/PeerConnection.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/framework/src/main/java/org/tron/core/net/peer/PeerConnection.java b/framework/src/main/java/org/tron/core/net/peer/PeerConnection.java index 13e5a6dafa1..9f105b70ceb 100644 --- a/framework/src/main/java/org/tron/core/net/peer/PeerConnection.java +++ b/framework/src/main/java/org/tron/core/net/peer/PeerConnection.java @@ -330,10 +330,10 @@ public synchronized boolean checkAndPutAdvInvRequest(Item key, Long value) { } // if peer is in adv status and no block received and sent between us for too long, and we - // don't receive block inventory also, then the peer is not active. Use only half of threshold. + // don't receive block inventory also, then the peer is not active. public boolean isNotActiveTooLong() { return System.currentTimeMillis() - getLatestTime() - > resilienceConfig.getPeerNotActiveThreshold() / 2 * 1000L; + > resilienceConfig.getPeerNotActiveThreshold() * 1000L; } private long getLatestTime() { From 828077b9b8170afd0f39e7b00a3f48e1d78c7e7e Mon Sep 17 00:00:00 2001 From: jiangyuanshu <317787106@qq.com> Date: Wed, 3 Jul 2024 16:08:20 +0800 Subject: [PATCH 20/29] add testcase testCondition1StopInv --- .../tron/core/net/peer/PeerConnection.java | 14 +++---- .../service/effective/ResilienceService.java | 4 +- .../net/services/ResilienceServiceTest.java | 41 +++++++++++++++++++ 3 files changed, 50 insertions(+), 9 deletions(-) diff --git a/framework/src/main/java/org/tron/core/net/peer/PeerConnection.java b/framework/src/main/java/org/tron/core/net/peer/PeerConnection.java index 9f105b70ceb..e085e80538a 100644 --- a/framework/src/main/java/org/tron/core/net/peer/PeerConnection.java +++ b/framework/src/main/java/org/tron/core/net/peer/PeerConnection.java @@ -427,6 +427,13 @@ public long getEarliestTime() { return Collections.min(times); } + public boolean isMalicious() { + boolean isMalicious = testStopInv ? (maliciousFeature.zombieBeginTime2 > 0) + : (maliciousFeature.zombieBeginTime > 0); + return maliciousFeature.badSyncBlockChainTime > 0 || maliciousFeature.badChainInventoryTime > 0 + || isMalicious; + } + @Override public String toString() { return String.format("(1:[%d] 2:[%d] 3:[%d] 4:[%d])", @@ -434,13 +441,6 @@ public String toString() { } } - public boolean isMalicious() { - boolean isMalicious = resilienceConfig.isTestStopInv() ? (maliciousFeature.zombieBeginTime2 > 0) - : (maliciousFeature.zombieBeginTime > 0); - return maliciousFeature.badSyncBlockChainTime > 0 || maliciousFeature.badChainInventoryTime > 0 - || isMalicious; - } - @Override public boolean equals(Object o) { if (!(o instanceof PeerConnection)) { diff --git a/framework/src/main/java/org/tron/core/net/service/effective/ResilienceService.java b/framework/src/main/java/org/tron/core/net/service/effective/ResilienceService.java index 316507f325d..1e77c3f329f 100644 --- a/framework/src/main/java/org/tron/core/net/service/effective/ResilienceService.java +++ b/framework/src/main/java/org/tron/core/net/service/effective/ResilienceService.java @@ -83,7 +83,7 @@ public void resilienceNode() { List peerList = tronNetDelegate.getActivePeer().stream() .filter(peer -> !peer.isDisconnect()) .filter(peer -> !peer.getChannel().isTrustPeer()) - .filter(PeerConnection::isMalicious) + .filter(peer -> peer.getMaliciousFeature().isMalicious()) .sorted((o1, o2) -> { if (o1.getChannel().isActive() && !o2.getChannel().isActive()) { return -1; @@ -121,7 +121,7 @@ private boolean findAndDisconnect(boolean excludeActive, String condition) { Optional p = tronNetDelegate.getActivePeer().stream() .filter(peer -> !peer.isDisconnect()) .filter(peer -> !peer.getChannel().isTrustPeer()) - .filter(PeerConnection::isMalicious) + .filter(peer -> peer.getMaliciousFeature().isMalicious()) .filter(peer -> !excludeActive || !peer.getChannel().isActive()) .min(Comparator.comparing(peer -> peer.getMaliciousFeature().getEarliestTime(), Long::compareTo)); diff --git a/framework/src/test/java/org/tron/core/net/services/ResilienceServiceTest.java b/framework/src/test/java/org/tron/core/net/services/ResilienceServiceTest.java index f285a874560..225f857b872 100644 --- a/framework/src/test/java/org/tron/core/net/services/ResilienceServiceTest.java +++ b/framework/src/test/java/org/tron/core/net/services/ResilienceServiceTest.java @@ -95,6 +95,47 @@ public void testCondition1() { Assert.assertEquals(minConnection - 1, PeerManager.getPeers().size()); } + @Test + public void testCondition1StopInv() { + + int minConnection = 8; + Assert.assertEquals(minConnection, Args.getInstance().getMinConnections()); + Assert.assertFalse(resilienceConfig.isTestStopInv()); + clearPeers(); + Assert.assertEquals(0, PeerManager.getPeers().size()); + + // test stop inventory + resilienceConfig.setTestStopInv(true); + + long t1 = + System.currentTimeMillis() - resilienceConfig.getPeerNotActiveThreshold() * 1000L - 1000L; + for (int i = 0; i < minConnection; i++) { + InetSocketAddress inetSocketAddress = new InetSocketAddress("201.0.0." + i, 10001); + Channel c1 = spy(Channel.class); + ReflectUtils.setFieldValue(c1, "inetSocketAddress", inetSocketAddress); + ReflectUtils.setFieldValue(c1, "inetAddress", inetSocketAddress.getAddress()); + ReflectUtils.setFieldValue(c1, "isActive", true); + ReflectUtils.setFieldValue(c1, "ctx", spy(ChannelHandlerContext.class)); + ReflectUtils.setFieldValue(c1, "lastActiveTime", t1); + Mockito.doNothing().when(c1).send((byte[]) any()); + PeerManager.add(context, c1); + } + + service.resilienceNode(); + Assert.assertEquals(minConnection, PeerManager.getPeers().size()); + + PeerConnection p = PeerManager.getPeers().get(0); + p.getMaliciousFeature().setAdvStartTime(t1); + p.getMaliciousFeature().setLastRecBlockInvTime(t1); + p.getMaliciousFeature().setStopBlockInvTime(t1 + 1); + p.getMaliciousFeature().updateBadFeature4(); + service.resilienceNode(); + Assert.assertEquals(minConnection - 1, PeerManager.getPeers().size()); + + //resume config + resilienceConfig.setTestStopInv(false); + } + @Test public void testCondition2() { Assert.assertEquals(8, Args.getInstance().getMinConnections()); From a57de7a4a182e42f8bd858b13ed262b8e810f97a Mon Sep 17 00:00:00 2001 From: jiangyuanshu <317787106@qq.com> Date: Wed, 3 Jul 2024 19:36:29 +0800 Subject: [PATCH 21/29] fix checkstyle and sonar check --- .../main/java/org/tron/core/net/peer/PeerConnection.java | 3 ++- .../core/net/service/effective/ResilienceService.java | 2 +- .../org/tron/core/net/services/ResilienceServiceTest.java | 8 ++++++-- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/framework/src/main/java/org/tron/core/net/peer/PeerConnection.java b/framework/src/main/java/org/tron/core/net/peer/PeerConnection.java index e085e80538a..51edc3a2920 100644 --- a/framework/src/main/java/org/tron/core/net/peer/PeerConnection.java +++ b/framework/src/main/java/org/tron/core/net/peer/PeerConnection.java @@ -430,7 +430,8 @@ public long getEarliestTime() { public boolean isMalicious() { boolean isMalicious = testStopInv ? (maliciousFeature.zombieBeginTime2 > 0) : (maliciousFeature.zombieBeginTime > 0); - return maliciousFeature.badSyncBlockChainTime > 0 || maliciousFeature.badChainInventoryTime > 0 + return maliciousFeature.badSyncBlockChainTime > 0 + || maliciousFeature.badChainInventoryTime > 0 || isMalicious; } diff --git a/framework/src/main/java/org/tron/core/net/service/effective/ResilienceService.java b/framework/src/main/java/org/tron/core/net/service/effective/ResilienceService.java index 1e77c3f329f..3ae34d98969 100644 --- a/framework/src/main/java/org/tron/core/net/service/effective/ResilienceService.java +++ b/framework/src/main/java/org/tron/core/net/service/effective/ResilienceService.java @@ -43,7 +43,7 @@ public void init() { } catch (Exception e) { logger.error("Resilience node failed", e); } - }, 5 * 60, resilienceConfig.getCheckInterval(), TimeUnit.SECONDS); + }, 300, resilienceConfig.getCheckInterval(), TimeUnit.SECONDS); } else { logger.info("ResilienceService is disabled"); } diff --git a/framework/src/test/java/org/tron/core/net/services/ResilienceServiceTest.java b/framework/src/test/java/org/tron/core/net/services/ResilienceServiceTest.java index 225f857b872..0a9e44847a8 100644 --- a/framework/src/test/java/org/tron/core/net/services/ResilienceServiceTest.java +++ b/framework/src/test/java/org/tron/core/net/services/ResilienceServiceTest.java @@ -229,12 +229,16 @@ public void testCondition3() { Assert.assertEquals(maxConnection, PeerManager.getPeers().size()); //set two passive peers to malicious (any feature is ok) + String firstIp = PeerManager.getPeers().get(activeNumber).getChannel().getInetAddress() + .getHostName(); PeerManager.getPeers().get(activeNumber).getMaliciousFeature().updateBadFeature2(); try { Thread.sleep(100); } catch (InterruptedException e) { Assert.fail(); } + String secondIp = PeerManager.getPeers().get(activeNumber + 5).getChannel().getInetAddress() + .getHostName(); PeerManager.getPeers().get(activeNumber + 5).getMaliciousFeature().updateBadFeature2(); //verify that disconnect one malicious peer service.resilienceNode(); @@ -245,8 +249,8 @@ public void testCondition3() { for (PeerConnection p : PeerManager.getPeers()) { ipSet.add(p.getChannel().getInetAddress().getHostName()); } - Assert.assertFalse(ipSet.contains("201.0.0." + activeNumber)); - Assert.assertTrue(ipSet.contains("201.0.0." + (activeNumber + 5))); + Assert.assertFalse(ipSet.contains(firstIp)); + Assert.assertTrue(ipSet.contains(secondIp)); } private void clearPeers() { From 7836f4c696d27984bd9a6766dacdb656706cbe92 Mon Sep 17 00:00:00 2001 From: jiangyuanshu <317787106@qq.com> Date: Thu, 4 Jul 2024 14:13:29 +0800 Subject: [PATCH 22/29] test pause send inventory --- .../tron/core/net/peer/PeerConnection.java | 2 ++ .../tron/core/net/service/adv/AdvService.java | 20 ++++++++++++++++--- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/framework/src/main/java/org/tron/core/net/peer/PeerConnection.java b/framework/src/main/java/org/tron/core/net/peer/PeerConnection.java index 51edc3a2920..a1116c427f8 100644 --- a/framework/src/main/java/org/tron/core/net/peer/PeerConnection.java +++ b/framework/src/main/java/org/tron/core/net/peer/PeerConnection.java @@ -352,6 +352,8 @@ public class MaliciousFeature { @Setter private long stopBlockInvTime = -1; @Setter + private long stopBlockInvEndTime = -1; + @Setter private long lastRecBlockInvTime = System.currentTimeMillis(); //if testStopInv=true, we use feature 4, else use feature 3. We an only use one of them. private final boolean testStopInv; diff --git a/framework/src/main/java/org/tron/core/net/service/adv/AdvService.java b/framework/src/main/java/org/tron/core/net/service/adv/AdvService.java index af17e3e0d47..9baca704cb6 100644 --- a/framework/src/main/java/org/tron/core/net/service/adv/AdvService.java +++ b/framework/src/main/java/org/tron/core/net/service/adv/AdvService.java @@ -368,20 +368,34 @@ public int getSize(PeerConnection peer) { } public void sendInv() { + long now = System.currentTimeMillis(); + int pauseTime = 10; send.forEach((peer, ids) -> ids.forEach((key, value) -> { if (peer.isRelayPeer() && key.equals(InventoryType.TRX)) { return; } if (key.equals(InventoryType.BLOCK)) { value.sort(Comparator.comparingLong(value1 -> new BlockId(value1).getNum())); + boolean canSendBlockInventory = true; if (testStopInv && peer.isNotActiveTooLong() && peer.getMaliciousFeature().getStopBlockInvTime() == -1) { //if peer is not active for too long, test if peer will broadcast block inventory to me //after I stop broadcasting block inventory to it - peer.getMaliciousFeature().setStopBlockInvTime(System.currentTimeMillis()); + logger.info("Test to stop broadcast block inv to {}", peer.getInetSocketAddress()); + peer.getMaliciousFeature().setStopBlockInvTime(now); + peer.getMaliciousFeature().setStopBlockInvEndTime(now + pauseTime * 1000L); invCheckExecutor.schedule(() -> peer.getMaliciousFeature().updateBadFeature4(), - 10, TimeUnit.SECONDS); - } else { + pauseTime, TimeUnit.SECONDS); + canSendBlockInventory = false; + } + if (peer.getMaliciousFeature().getStopBlockInvTime() <= now + && now <= peer.getMaliciousFeature().getStopBlockInvEndTime()) { + canSendBlockInventory = false; + } + if(peer.getMaliciousFeature().getZombieBeginTime2() > 0){ + canSendBlockInventory = false; + } + if (canSendBlockInventory) { peer.sendMessage(new InventoryMessage(value, key)); } } else { From 24bcaa6d82de18c1472e1309be0c8866158b5378 Mon Sep 17 00:00:00 2001 From: jiangyuanshu <317787106@qq.com> Date: Thu, 4 Jul 2024 14:56:00 +0800 Subject: [PATCH 23/29] set TEST_PAUSE_INV_SECONDS to constant --- .../net/messagehandler/InventoryMsgHandler.java | 2 +- .../org/tron/core/net/peer/PeerConnection.java | 10 +++++----- .../org/tron/core/net/service/adv/AdvService.java | 14 +++++++------- .../core/net/services/ResilienceServiceTest.java | 2 +- 4 files changed, 14 insertions(+), 14 deletions(-) diff --git a/framework/src/main/java/org/tron/core/net/messagehandler/InventoryMsgHandler.java b/framework/src/main/java/org/tron/core/net/messagehandler/InventoryMsgHandler.java index f856324743c..2235e9053a4 100644 --- a/framework/src/main/java/org/tron/core/net/messagehandler/InventoryMsgHandler.java +++ b/framework/src/main/java/org/tron/core/net/messagehandler/InventoryMsgHandler.java @@ -42,7 +42,7 @@ public void processMessage(PeerConnection peer, TronMessage msg) { if (type.equals(InventoryType.BLOCK) && peer.getAdvInvSpread().getIfPresent(item) == null) { peer.getMaliciousFeature().setLastRecBlockInvTime(System.currentTimeMillis()); - peer.getMaliciousFeature().resetStopBlockInvTime(); //stop test + //peer.getMaliciousFeature().resetStopBlockInvTime(); //stop test } } } diff --git a/framework/src/main/java/org/tron/core/net/peer/PeerConnection.java b/framework/src/main/java/org/tron/core/net/peer/PeerConnection.java index a1116c427f8..c33d4612192 100644 --- a/framework/src/main/java/org/tron/core/net/peer/PeerConnection.java +++ b/framework/src/main/java/org/tron/core/net/peer/PeerConnection.java @@ -350,7 +350,7 @@ public class MaliciousFeature { @Setter private long advStartTime = System.currentTimeMillis(); @Setter - private long stopBlockInvTime = -1; + private long stopBlockInvStartTime = -1; @Setter private long stopBlockInvEndTime = -1; @Setter @@ -396,14 +396,14 @@ public void updateBadFeature3() { // else it's malicious. it can only be set from -1 to positive public void updateBadFeature4() { if (zombieBeginTime2 < 0 - && maliciousFeature.lastRecBlockInvTime < maliciousFeature.stopBlockInvTime) { + && maliciousFeature.lastRecBlockInvTime < maliciousFeature.stopBlockInvStartTime) { zombieBeginTime2 = getLatestTime(); } } - public void resetStopBlockInvTime() { - stopBlockInvTime = -1; - } + //public void resetStopBlockInvTime() { + // stopBlockInvTime = -1; + //} public long getEarliestTime() { List times = new ArrayList<>(); diff --git a/framework/src/main/java/org/tron/core/net/service/adv/AdvService.java b/framework/src/main/java/org/tron/core/net/service/adv/AdvService.java index 9baca704cb6..62e4211c1f7 100644 --- a/framework/src/main/java/org/tron/core/net/service/adv/AdvService.java +++ b/framework/src/main/java/org/tron/core/net/service/adv/AdvService.java @@ -45,6 +45,7 @@ public class AdvService { private final int MAX_TRX_CACHE_SIZE = 50_000; private final int MAX_BLOCK_CACHE_SIZE = 10; private final int MAX_SPREAD_SIZE = 1_000; + private final int TEST_PAUSE_INV_SECONDS = 10; private final long TIMEOUT = MSG_CACHE_DURATION_IN_BLOCKS * BLOCK_PRODUCED_INTERVAL; private final boolean testStopInv = Args.getInstance().getResilienceConfig().isEnabled() && Args.getInstance().getResilienceConfig().isTestStopInv(); @@ -369,7 +370,6 @@ public int getSize(PeerConnection peer) { public void sendInv() { long now = System.currentTimeMillis(); - int pauseTime = 10; send.forEach((peer, ids) -> ids.forEach((key, value) -> { if (peer.isRelayPeer() && key.equals(InventoryType.TRX)) { return; @@ -378,21 +378,21 @@ public void sendInv() { value.sort(Comparator.comparingLong(value1 -> new BlockId(value1).getNum())); boolean canSendBlockInventory = true; if (testStopInv && peer.isNotActiveTooLong() - && peer.getMaliciousFeature().getStopBlockInvTime() == -1) { + && peer.getMaliciousFeature().getStopBlockInvStartTime() == -1) { //if peer is not active for too long, test if peer will broadcast block inventory to me //after I stop broadcasting block inventory to it logger.info("Test to stop broadcast block inv to {}", peer.getInetSocketAddress()); - peer.getMaliciousFeature().setStopBlockInvTime(now); - peer.getMaliciousFeature().setStopBlockInvEndTime(now + pauseTime * 1000L); + peer.getMaliciousFeature().setStopBlockInvStartTime(now); + peer.getMaliciousFeature().setStopBlockInvEndTime(now + TEST_PAUSE_INV_SECONDS * 1000L); invCheckExecutor.schedule(() -> peer.getMaliciousFeature().updateBadFeature4(), - pauseTime, TimeUnit.SECONDS); + TEST_PAUSE_INV_SECONDS, TimeUnit.SECONDS); canSendBlockInventory = false; } - if (peer.getMaliciousFeature().getStopBlockInvTime() <= now + if (peer.getMaliciousFeature().getStopBlockInvStartTime() <= now && now <= peer.getMaliciousFeature().getStopBlockInvEndTime()) { canSendBlockInventory = false; } - if(peer.getMaliciousFeature().getZombieBeginTime2() > 0){ + if (peer.getMaliciousFeature().getZombieBeginTime2() > 0) { canSendBlockInventory = false; } if (canSendBlockInventory) { diff --git a/framework/src/test/java/org/tron/core/net/services/ResilienceServiceTest.java b/framework/src/test/java/org/tron/core/net/services/ResilienceServiceTest.java index 0a9e44847a8..061a3c9dd5c 100644 --- a/framework/src/test/java/org/tron/core/net/services/ResilienceServiceTest.java +++ b/framework/src/test/java/org/tron/core/net/services/ResilienceServiceTest.java @@ -127,7 +127,7 @@ public void testCondition1StopInv() { PeerConnection p = PeerManager.getPeers().get(0); p.getMaliciousFeature().setAdvStartTime(t1); p.getMaliciousFeature().setLastRecBlockInvTime(t1); - p.getMaliciousFeature().setStopBlockInvTime(t1 + 1); + p.getMaliciousFeature().setStopBlockInvStartTime(t1 + 1); p.getMaliciousFeature().updateBadFeature4(); service.resilienceNode(); Assert.assertEquals(minConnection - 1, PeerManager.getPeers().size()); From 5bf9ea3b98038a0392169585b7e2975463ffddf2 Mon Sep 17 00:00:00 2001 From: jiangyuanshu <317787106@qq.com> Date: Thu, 4 Jul 2024 20:50:27 +0800 Subject: [PATCH 24/29] delete config item node.peerNoBlockTime --- .../java/org/tron/common/parameter/CommonParameter.java | 3 --- common/src/main/java/org/tron/core/Constant.java | 1 - framework/src/main/java/org/tron/core/config/args/Args.java | 3 --- .../src/main/java/org/tron/core/net/TronNetService.java | 2 +- .../tron/core/net/messagehandler/InventoryMsgHandler.java | 1 - .../main/java/org/tron/core/net/peer/PeerConnection.java | 6 +----- 6 files changed, 2 insertions(+), 14 deletions(-) diff --git a/common/src/main/java/org/tron/common/parameter/CommonParameter.java b/common/src/main/java/org/tron/common/parameter/CommonParameter.java index bd2baab923d..846d1e40c12 100644 --- a/common/src/main/java/org/tron/common/parameter/CommonParameter.java +++ b/common/src/main/java/org/tron/common/parameter/CommonParameter.java @@ -336,9 +336,6 @@ public class CommonParameter { public boolean isOpenFullTcpDisconnect; @Getter @Setter - public int peerNoBlockTime; - @Getter - @Setter public boolean nodeDetectEnable; @Getter @Setter diff --git a/common/src/main/java/org/tron/core/Constant.java b/common/src/main/java/org/tron/core/Constant.java index e7f3a11fe14..1e8f1921c97 100644 --- a/common/src/main/java/org/tron/core/Constant.java +++ b/common/src/main/java/org/tron/core/Constant.java @@ -204,7 +204,6 @@ public class Constant { public static final String NODE_RECEIVE_TCP_MIN_DATA_LENGTH = "node.receiveTcpMinDataLength"; public static final String NODE_IS_OPEN_FULL_TCP_DISCONNECT = "node.isOpenFullTcpDisconnect"; - public static final String NODE_PEER_NO_BLOCK_TIME = "node.peerNoBlockTime"; public static final String NODE_DETECT_ENABLE = "node.nodeDetectEnable"; diff --git a/framework/src/main/java/org/tron/core/config/args/Args.java b/framework/src/main/java/org/tron/core/config/args/Args.java index 420104461c0..b31a6afb752 100644 --- a/framework/src/main/java/org/tron/core/config/args/Args.java +++ b/framework/src/main/java/org/tron/core/config/args/Args.java @@ -173,7 +173,6 @@ public static void clearParam() { PARAMETER.estimateEnergyMaxRetry = 3; PARAMETER.receiveTcpMinDataLength = 2048; PARAMETER.isOpenFullTcpDisconnect = false; - PARAMETER.peerNoBlockTime = 300; PARAMETER.nodeDetectEnable = false; PARAMETER.supportConstant = false; PARAMETER.debug = false; @@ -845,8 +844,6 @@ public static void setParam(final String[] args, final String confFileName) { PARAMETER.isOpenFullTcpDisconnect = config.hasPath(Constant.NODE_IS_OPEN_FULL_TCP_DISCONNECT) && config.getBoolean(Constant.NODE_IS_OPEN_FULL_TCP_DISCONNECT); - PARAMETER.peerNoBlockTime = config.hasPath(Constant.NODE_PEER_NO_BLOCK_TIME) - ? config.getInt(Constant.NODE_PEER_NO_BLOCK_TIME) : 300; PARAMETER.nodeDetectEnable = config.hasPath(Constant.NODE_DETECT_ENABLE) && config.getBoolean(Constant.NODE_DETECT_ENABLE); diff --git a/framework/src/main/java/org/tron/core/net/TronNetService.java b/framework/src/main/java/org/tron/core/net/TronNetService.java index ba11e7ea0a7..4f22b4ac26d 100644 --- a/framework/src/main/java/org/tron/core/net/TronNetService.java +++ b/framework/src/main/java/org/tron/core/net/TronNetService.java @@ -184,7 +184,7 @@ private P2pConfig updateConfig(P2pConfig config) { config.setPort(parameter.getNodeListenPort()); config.setNetworkId(parameter.getNodeP2pVersion()); config.setDisconnectionPolicyEnable(parameter.isOpenFullTcpDisconnect()); - config.setNotActiveInterval(parameter.peerNoBlockTime * 1000L); + config.setNotActiveInterval(parameter.getResilienceConfig().getPeerNotActiveThreshold()); config.setNodeDetectEnable(parameter.isNodeDetectEnable()); config.setDiscoverEnable(parameter.isNodeDiscoveryEnable()); if (StringUtils.isEmpty(config.getIp()) && hasIpv4Stack(NetUtil.getAllLocalAddress())) { diff --git a/framework/src/main/java/org/tron/core/net/messagehandler/InventoryMsgHandler.java b/framework/src/main/java/org/tron/core/net/messagehandler/InventoryMsgHandler.java index 2235e9053a4..5d1e81f5358 100644 --- a/framework/src/main/java/org/tron/core/net/messagehandler/InventoryMsgHandler.java +++ b/framework/src/main/java/org/tron/core/net/messagehandler/InventoryMsgHandler.java @@ -42,7 +42,6 @@ public void processMessage(PeerConnection peer, TronMessage msg) { if (type.equals(InventoryType.BLOCK) && peer.getAdvInvSpread().getIfPresent(item) == null) { peer.getMaliciousFeature().setLastRecBlockInvTime(System.currentTimeMillis()); - //peer.getMaliciousFeature().resetStopBlockInvTime(); //stop test } } } diff --git a/framework/src/main/java/org/tron/core/net/peer/PeerConnection.java b/framework/src/main/java/org/tron/core/net/peer/PeerConnection.java index c33d4612192..88ea53a5227 100644 --- a/framework/src/main/java/org/tron/core/net/peer/PeerConnection.java +++ b/framework/src/main/java/org/tron/core/net/peer/PeerConnection.java @@ -354,7 +354,7 @@ public class MaliciousFeature { @Setter private long stopBlockInvEndTime = -1; @Setter - private long lastRecBlockInvTime = System.currentTimeMillis(); + private long lastRecBlockInvTime = -1; //if testStopInv=true, we use feature 4, else use feature 3. We an only use one of them. private final boolean testStopInv; @@ -401,10 +401,6 @@ public void updateBadFeature4() { } } - //public void resetStopBlockInvTime() { - // stopBlockInvTime = -1; - //} - public long getEarliestTime() { List times = new ArrayList<>(); if (badSyncBlockChainTime > 0) { From 15b3abcc36b29886712438a04c65f54846ca20e1 Mon Sep 17 00:00:00 2001 From: jiangyuanshu <317787106@qq.com> Date: Tue, 9 Jul 2024 10:53:22 +0800 Subject: [PATCH 25/29] init latestSaveBlockTime in init method --- .../src/main/java/org/tron/core/ChainBaseManager.java | 3 ++- .../java/org/tron/core/net/peer/PeerConnection.java | 10 ++++++---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/chainbase/src/main/java/org/tron/core/ChainBaseManager.java b/chainbase/src/main/java/org/tron/core/ChainBaseManager.java index 859c425e51e..d148021f6c4 100644 --- a/chainbase/src/main/java/org/tron/core/ChainBaseManager.java +++ b/chainbase/src/main/java/org/tron/core/ChainBaseManager.java @@ -246,7 +246,7 @@ public class ChainBaseManager { @Getter @Setter - private long latestSaveBlockTime = System.currentTimeMillis(); + private long latestSaveBlockTime; // for test only public List getWitnesses() { @@ -385,6 +385,7 @@ private void init() { this.lowestBlockNum = this.blockIndexStore.getLimitNumber(1, 1).stream() .map(BlockId::getNum).findFirst().orElse(0L); this.nodeType = getLowestBlockNum() > 1 ? NodeType.LITE : NodeType.FULL; + this.latestSaveBlockTime = System.currentTimeMillis(); } public void shutdown() { diff --git a/framework/src/main/java/org/tron/core/net/peer/PeerConnection.java b/framework/src/main/java/org/tron/core/net/peer/PeerConnection.java index 88ea53a5227..2defd116e43 100644 --- a/framework/src/main/java/org/tron/core/net/peer/PeerConnection.java +++ b/framework/src/main/java/org/tron/core/net/peer/PeerConnection.java @@ -82,6 +82,9 @@ public class PeerConnection { private final ResilienceConfig resilienceConfig = Args.getInstance().getResilienceConfig(); + private final boolean testStopInv = Args.getInstance().getResilienceConfig().isEnabled() + && Args.getInstance().getResilienceConfig().isTestStopInv(); + @Getter private final MaliciousFeature maliciousFeature = new MaliciousFeature(); @@ -348,15 +351,13 @@ private long getLatestTime() { public class MaliciousFeature { @Setter - private long advStartTime = System.currentTimeMillis(); + private long advStartTime; @Setter private long stopBlockInvStartTime = -1; @Setter private long stopBlockInvEndTime = -1; @Setter private long lastRecBlockInvTime = -1; - //if testStopInv=true, we use feature 4, else use feature 3. We an only use one of them. - private final boolean testStopInv; //four features private long badSyncBlockChainTime = -1; //feature 1 @@ -365,7 +366,7 @@ public class MaliciousFeature { private long zombieBeginTime2 = -1; //feature 4 public MaliciousFeature() { - testStopInv = Args.getInstance().getResilienceConfig().isTestStopInv(); + advStartTime = System.currentTimeMillis(); } //it can only be set from -1 to positive @@ -426,6 +427,7 @@ public long getEarliestTime() { } public boolean isMalicious() { + //if testStopInv=true, we use feature 4, else use feature 3. We an only use one of them. boolean isMalicious = testStopInv ? (maliciousFeature.zombieBeginTime2 > 0) : (maliciousFeature.zombieBeginTime > 0); return maliciousFeature.badSyncBlockChainTime > 0 From 369cd9273a6ee2309440d120944f6f29f48f796f Mon Sep 17 00:00:00 2001 From: jiangyuanshu <317787106@qq.com> Date: Tue, 9 Jul 2024 11:38:55 +0800 Subject: [PATCH 26/29] use setNeedSyncFromPeer,setNeedSyncFromUs method --- .../ChainInventoryMsgHandler.java | 1 - .../SyncBlockChainMsgHandler.java | 1 - .../tron/core/net/peer/PeerConnection.java | 19 ++++++++++++------- 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/framework/src/main/java/org/tron/core/net/messagehandler/ChainInventoryMsgHandler.java b/framework/src/main/java/org/tron/core/net/messagehandler/ChainInventoryMsgHandler.java index 96688366820..1e432f2131d 100644 --- a/framework/src/main/java/org/tron/core/net/messagehandler/ChainInventoryMsgHandler.java +++ b/framework/src/main/java/org/tron/core/net/messagehandler/ChainInventoryMsgHandler.java @@ -56,7 +56,6 @@ public void processMessage(PeerConnection peer, TronMessage msg) throws P2pExcep } peer.setTronState(TronState.SYNC_COMPLETED); peer.setNeedSyncFromPeer(false); - peer.updateAdvStartTime(); return; } diff --git a/framework/src/main/java/org/tron/core/net/messagehandler/SyncBlockChainMsgHandler.java b/framework/src/main/java/org/tron/core/net/messagehandler/SyncBlockChainMsgHandler.java index 211cf18ebfa..cbdd60b0a78 100644 --- a/framework/src/main/java/org/tron/core/net/messagehandler/SyncBlockChainMsgHandler.java +++ b/framework/src/main/java/org/tron/core/net/messagehandler/SyncBlockChainMsgHandler.java @@ -51,7 +51,6 @@ public void processMessage(PeerConnection peer, TronMessage msg) throws P2pExcep return; } else if (blockIds.size() == 1) { peer.setNeedSyncFromUs(false); - peer.updateAdvStartTime(); } else { peer.setNeedSyncFromUs(true); remainNum = headID.getNum() - blockIds.peekLast().getNum(); diff --git a/framework/src/main/java/org/tron/core/net/peer/PeerConnection.java b/framework/src/main/java/org/tron/core/net/peer/PeerConnection.java index 2defd116e43..ba31a43e306 100644 --- a/framework/src/main/java/org/tron/core/net/peer/PeerConnection.java +++ b/framework/src/main/java/org/tron/core/net/peer/PeerConnection.java @@ -159,10 +159,8 @@ public class PeerConnection { @Setter @Getter private Set syncBlockInProcess = new HashSet<>(); - @Setter @Getter private volatile boolean needSyncFromPeer = true; - @Setter @Getter private volatile boolean needSyncFromUs = true; @@ -179,7 +177,15 @@ public void setBlockBothHave(BlockId blockId) { this.blockBothHaveUpdateTime = System.currentTimeMillis(); } - public void updateAdvStartTime() { + public void setNeedSyncFromPeer(boolean flag) { + needSyncFromPeer = flag; + if (!needSyncFromPeer && !needSyncFromUs) { + this.getMaliciousFeature().advStartTime = System.currentTimeMillis(); + } + } + + public void setNeedSyncFromUs(boolean flag) { + needSyncFromUs = flag; if (!needSyncFromPeer && !needSyncFromUs) { this.getMaliciousFeature().advStartTime = System.currentTimeMillis(); } @@ -205,16 +211,15 @@ public void onConnect() { long peerHeadBlockNum = helloMessageReceive.getHeadBlockId().getNum(); if (peerHeadBlockNum > headBlockNum) { - needSyncFromUs = false; + setNeedSyncFromUs(false); syncService.startSync(this); } else { - needSyncFromPeer = false; + setNeedSyncFromPeer(false); if (peerHeadBlockNum == headBlockNum) { - needSyncFromUs = false; + setNeedSyncFromUs(false); } setTronState(TronState.SYNC_COMPLETED); } - updateAdvStartTime(); } public void onDisconnect() { From 3662502903d28a67d17a7055b9d58b452e7bb74b Mon Sep 17 00:00:00 2001 From: jiangyuanshu <317787106@qq.com> Date: Tue, 9 Jul 2024 22:08:13 +0800 Subject: [PATCH 27/29] update some default value of class Feature --- .../ChainInventoryMsgHandler.java | 2 +- .../messagehandler/InventoryMsgHandler.java | 2 +- .../SyncBlockChainMsgHandler.java | 2 +- .../tron/core/net/peer/PeerConnection.java | 83 +++++++++---------- .../tron/core/net/service/adv/AdvService.java | 14 ++-- .../service/effective/ResilienceService.java | 34 ++++---- .../net/services/ResilienceServiceTest.java | 28 +++---- 7 files changed, 85 insertions(+), 80 deletions(-) diff --git a/framework/src/main/java/org/tron/core/net/messagehandler/ChainInventoryMsgHandler.java b/framework/src/main/java/org/tron/core/net/messagehandler/ChainInventoryMsgHandler.java index 1e432f2131d..789315bfbe4 100644 --- a/framework/src/main/java/org/tron/core/net/messagehandler/ChainInventoryMsgHandler.java +++ b/framework/src/main/java/org/tron/core/net/messagehandler/ChainInventoryMsgHandler.java @@ -52,7 +52,7 @@ public void processMessage(PeerConnection peer, TronMessage msg) throws P2pExcep if (blockIdWeGet.size() == 1 && tronNetDelegate.containBlock(blockIdWeGet.peek())) { if (blockIdWeGet.peek().getNum() < peer.getHelloMessageReceive().getSolidBlockId().getNum()) { - peer.getMaliciousFeature().updateBadFeature1(); + peer.getFeature().updateBadChainInventoryTime(); } peer.setTronState(TronState.SYNC_COMPLETED); peer.setNeedSyncFromPeer(false); diff --git a/framework/src/main/java/org/tron/core/net/messagehandler/InventoryMsgHandler.java b/framework/src/main/java/org/tron/core/net/messagehandler/InventoryMsgHandler.java index 5d1e81f5358..ab6f340bc90 100644 --- a/framework/src/main/java/org/tron/core/net/messagehandler/InventoryMsgHandler.java +++ b/framework/src/main/java/org/tron/core/net/messagehandler/InventoryMsgHandler.java @@ -41,7 +41,7 @@ public void processMessage(PeerConnection peer, TronMessage msg) { advService.addInv(item); if (type.equals(InventoryType.BLOCK) && peer.getAdvInvSpread().getIfPresent(item) == null) { - peer.getMaliciousFeature().setLastRecBlockInvTime(System.currentTimeMillis()); + peer.getFeature().setLastRecBlockInvTime(System.currentTimeMillis()); } } } diff --git a/framework/src/main/java/org/tron/core/net/messagehandler/SyncBlockChainMsgHandler.java b/framework/src/main/java/org/tron/core/net/messagehandler/SyncBlockChainMsgHandler.java index cbdd60b0a78..01d7c32b7c7 100644 --- a/framework/src/main/java/org/tron/core/net/messagehandler/SyncBlockChainMsgHandler.java +++ b/framework/src/main/java/org/tron/core/net/messagehandler/SyncBlockChainMsgHandler.java @@ -39,7 +39,7 @@ public void processMessage(PeerConnection peer, TronMessage msg) throws P2pExcep List summaryChainIds = syncBlockChainMessage.getBlockIds(); if (peer.isNeedSyncFromUs() && summaryChainIds.size() == 1 && summaryChainIds.get(0).getNum() == peer.getHelloMessageSend().getHeadBlockId().getNum()) { - peer.getMaliciousFeature().updateBadFeature2(); + peer.getFeature().updateBadSyncBlockChainTime(); } BlockId headID = tronNetDelegate.getHeadBlockId(); diff --git a/framework/src/main/java/org/tron/core/net/peer/PeerConnection.java b/framework/src/main/java/org/tron/core/net/peer/PeerConnection.java index ba31a43e306..32c2d73ee67 100644 --- a/framework/src/main/java/org/tron/core/net/peer/PeerConnection.java +++ b/framework/src/main/java/org/tron/core/net/peer/PeerConnection.java @@ -86,7 +86,7 @@ public class PeerConnection { && Args.getInstance().getResilienceConfig().isTestStopInv(); @Getter - private final MaliciousFeature maliciousFeature = new MaliciousFeature(); + private final Feature feature = new Feature(); @Getter @Setter @@ -180,14 +180,14 @@ public void setBlockBothHave(BlockId blockId) { public void setNeedSyncFromPeer(boolean flag) { needSyncFromPeer = flag; if (!needSyncFromPeer && !needSyncFromUs) { - this.getMaliciousFeature().advStartTime = System.currentTimeMillis(); + this.getFeature().advStartTime = System.currentTimeMillis(); } } public void setNeedSyncFromUs(boolean flag) { needSyncFromUs = flag; if (!needSyncFromPeer && !needSyncFromUs) { - this.getMaliciousFeature().advStartTime = System.currentTimeMillis(); + this.getFeature().advStartTime = System.currentTimeMillis(); } } @@ -264,7 +264,7 @@ public String log() { / Constant.ONE_THOUSAND, syncBlockInProcess.size()); if (resilienceConfig.isEnabled()) { - data += String.format("feature:%s\n", maliciousFeature); + data += String.format("feature:%s\n", feature); } return data; } @@ -347,63 +347,64 @@ public boolean isNotActiveTooLong() { private long getLatestTime() { List times = Arrays.asList( channel.getLastActiveTime(), //last block time - getMaliciousFeature().getAdvStartTime(), //adv begin time - getMaliciousFeature().getLastRecBlockInvTime()); //inventory + getFeature().getAdvStartTime(), //adv begin time + getFeature().getLastRecBlockInvTime()); //last receive inventory time return Collections.max(times); } @Getter - public class MaliciousFeature { + public class Feature { @Setter private long advStartTime; @Setter - private long stopBlockInvStartTime = -1; + private long stopBlockInvStartTime; @Setter - private long stopBlockInvEndTime = -1; + private long stopBlockInvEndTime; @Setter - private long lastRecBlockInvTime = -1; + private long lastRecBlockInvTime; //four features - private long badSyncBlockChainTime = -1; //feature 1 - private long badChainInventoryTime = -1; //feature 2 - private long zombieBeginTime = -1; //feature 3 - private long zombieBeginTime2 = -1; //feature 4 + private long badSyncBlockChainTime; + private long badChainInventoryTime; + private long noInteractionTime; + private long noInvBackTime; - public MaliciousFeature() { + public Feature() { advStartTime = System.currentTimeMillis(); } - //it can only be set from -1 to positive - public void updateBadFeature1() { - if (badSyncBlockChainTime < 0) { + //If SyncBlockChain's blockId size is 1 and the block number is equal to block number of + // HelloMessage that i sent to peer, it's malicious. it can only be set from 0 to positive. + public void updateBadSyncBlockChainTime() { + if (badSyncBlockChainTime == 0) { badSyncBlockChainTime = System.currentTimeMillis(); } } - //it can only be set from -1 to positive - public void updateBadFeature2() { - if (badChainInventoryTime < 0) { + // if ChainInventory's blockId size is 1 and its number is smaller than received HelloMessage's + // solid block number, it's malicious. it can only be set from 0 to positive + public void updateBadChainInventoryTime() { + if (badChainInventoryTime == 0) { badChainInventoryTime = System.currentTimeMillis(); } } - // if peer is in adv status and no block received and sent between us for too long, - // it is a zombie. - public void updateBadFeature3() { + // if peer is in adv status and no block received and sent between us for too long, it is + // malicious. + public void updateNoInteractionTime() { long tempTime = Math.max(channel.getLastActiveTime(), advStartTime); if (!needSyncFromPeer && !needSyncFromUs && System.currentTimeMillis() - tempTime > resilienceConfig.getPeerNotActiveThreshold() * 1000L) { - zombieBeginTime = tempTime; + noInteractionTime = tempTime; } } - //if receive block inventory from peer 0 ~ 10 seconds later after inventory check, it's ok. - // else it's malicious. it can only be set from -1 to positive - public void updateBadFeature4() { - if (zombieBeginTime2 < 0 - && maliciousFeature.lastRecBlockInvTime < maliciousFeature.stopBlockInvStartTime) { - zombieBeginTime2 = getLatestTime(); + // if i receive any block inventory from peer during 0 ~ 10 seconds later after inventory check, + // it's ok, else it's malicious. it can only be set from 0 to positive + public void updateNoInvBackTime() { + if (noInvBackTime == 0 && lastRecBlockInvTime < stopBlockInvStartTime) { + noInvBackTime = getLatestTime(); } } @@ -417,12 +418,12 @@ public long getEarliestTime() { } if (!testStopInv) { - if (zombieBeginTime > 0) { - times.add(zombieBeginTime); + if (noInteractionTime > 0) { + times.add(noInteractionTime); } } else { - if (zombieBeginTime2 > 0) { - times.add(zombieBeginTime2); + if (noInvBackTime > 0) { + times.add(noInvBackTime); } } if (times.isEmpty()) { @@ -432,18 +433,16 @@ public long getEarliestTime() { } public boolean isMalicious() { - //if testStopInv=true, we use feature 4, else use feature 3. We an only use one of them. - boolean isMalicious = testStopInv ? (maliciousFeature.zombieBeginTime2 > 0) - : (maliciousFeature.zombieBeginTime > 0); - return maliciousFeature.badSyncBlockChainTime > 0 - || maliciousFeature.badChainInventoryTime > 0 - || isMalicious; + //if testStopInv=true, we use noInvBackTime, else use noInteractionTime. + // We an only use one of them. + boolean isMalicious = testStopInv ? (noInvBackTime > 0) : (noInteractionTime > 0); + return badSyncBlockChainTime > 0 || badChainInventoryTime > 0 || isMalicious; } @Override public String toString() { return String.format("(1:[%d] 2:[%d] 3:[%d] 4:[%d])", - badSyncBlockChainTime, badChainInventoryTime, zombieBeginTime, zombieBeginTime2); + badSyncBlockChainTime, badChainInventoryTime, noInteractionTime, noInvBackTime); } } diff --git a/framework/src/main/java/org/tron/core/net/service/adv/AdvService.java b/framework/src/main/java/org/tron/core/net/service/adv/AdvService.java index 62e4211c1f7..306c9a04723 100644 --- a/framework/src/main/java/org/tron/core/net/service/adv/AdvService.java +++ b/framework/src/main/java/org/tron/core/net/service/adv/AdvService.java @@ -378,21 +378,21 @@ public void sendInv() { value.sort(Comparator.comparingLong(value1 -> new BlockId(value1).getNum())); boolean canSendBlockInventory = true; if (testStopInv && peer.isNotActiveTooLong() - && peer.getMaliciousFeature().getStopBlockInvStartTime() == -1) { + && peer.getFeature().getStopBlockInvStartTime() == 0) { //if peer is not active for too long, test if peer will broadcast block inventory to me //after I stop broadcasting block inventory to it logger.info("Test to stop broadcast block inv to {}", peer.getInetSocketAddress()); - peer.getMaliciousFeature().setStopBlockInvStartTime(now); - peer.getMaliciousFeature().setStopBlockInvEndTime(now + TEST_PAUSE_INV_SECONDS * 1000L); - invCheckExecutor.schedule(() -> peer.getMaliciousFeature().updateBadFeature4(), + peer.getFeature().setStopBlockInvStartTime(now); + peer.getFeature().setStopBlockInvEndTime(now + TEST_PAUSE_INV_SECONDS * 1000L); + invCheckExecutor.schedule(() -> peer.getFeature().updateNoInvBackTime(), TEST_PAUSE_INV_SECONDS, TimeUnit.SECONDS); canSendBlockInventory = false; } - if (peer.getMaliciousFeature().getStopBlockInvStartTime() <= now - && now <= peer.getMaliciousFeature().getStopBlockInvEndTime()) { + if (peer.getFeature().getStopBlockInvStartTime() <= now + && now <= peer.getFeature().getStopBlockInvEndTime()) { canSendBlockInventory = false; } - if (peer.getMaliciousFeature().getZombieBeginTime2() > 0) { + if (peer.getFeature().getNoInvBackTime() > 0) { canSendBlockInventory = false; } if (canSendBlockInventory) { diff --git a/framework/src/main/java/org/tron/core/net/service/effective/ResilienceService.java b/framework/src/main/java/org/tron/core/net/service/effective/ResilienceService.java index 3ae34d98969..fc1e49f6ca3 100644 --- a/framework/src/main/java/org/tron/core/net/service/effective/ResilienceService.java +++ b/framework/src/main/java/org/tron/core/net/service/effective/ResilienceService.java @@ -54,7 +54,7 @@ public void resilienceNode() { if (!resilienceConfig.isTestStopInv()) { //update peers' bad feature 3 at first tronNetDelegate.getActivePeer() - .forEach(peer -> peer.getMaliciousFeature().updateBadFeature3()); + .forEach(peer -> peer.getFeature().updateNoInteractionTime()); } int peerSize = tronNetDelegate.getActivePeer().size(); @@ -65,7 +65,7 @@ public void resilienceNode() { //1. if local node belongs to a lan network, disconnect with first malicious node if necessary if (peerSize == activePeerSize && peerSize >= CommonParameter.getInstance().minConnections) { - findCount = findAndDisconnect(false, "case1") ? 1 : 0; + findCount = findAndDisconnect(false, DisconnectStrategy.LAN_NODE) ? 1 : 0; } //2. if local node's latestSaveBlockTime has not changed more than several minutes, @@ -83,15 +83,15 @@ public void resilienceNode() { List peerList = tronNetDelegate.getActivePeer().stream() .filter(peer -> !peer.isDisconnect()) .filter(peer -> !peer.getChannel().isTrustPeer()) - .filter(peer -> peer.getMaliciousFeature().isMalicious()) + .filter(peer -> peer.getFeature().isMalicious()) .sorted((o1, o2) -> { if (o1.getChannel().isActive() && !o2.getChannel().isActive()) { return -1; } else if (!o1.getChannel().isActive() && o2.getChannel().isActive()) { return 1; } else { - return Long.compare(o1.getMaliciousFeature().getEarliestTime(), - o2.getMaliciousFeature().getEarliestTime()); + return Long.compare(o1.getFeature().getEarliestTime(), + o2.getFeature().getEarliestTime()); } }) .collect(Collectors.toList()); @@ -101,7 +101,7 @@ public void resilienceNode() { peerList = peerList.subList(0, resilienceConfig.getDisconnectNumber()); } if (!peerList.isEmpty()) { - peerList.forEach(peer -> disconnectMaliciousPeer(peer, "case2")); + peerList.forEach(peer -> disconnectMaliciousPeer(peer, DisconnectStrategy.ISOLATED)); findCount = peerList.size(); } } @@ -109,7 +109,7 @@ public void resilienceNode() { //3. if peers' number is equal or larger than maxConnections, disconnect with oldest peer if (findCount == 0 && peerSize >= CommonParameter.getInstance().maxConnections) { - findCount = findAndDisconnect(true, "case3") ? 1 : 0; + findCount = findAndDisconnect(true, DisconnectStrategy.FULL_CONNECTION) ? 1 : 0; } if (findCount > 0) { @@ -117,28 +117,34 @@ public void resilienceNode() { } } - private boolean findAndDisconnect(boolean excludeActive, String condition) { + private boolean findAndDisconnect(boolean excludeActive, DisconnectStrategy strategy) { Optional p = tronNetDelegate.getActivePeer().stream() .filter(peer -> !peer.isDisconnect()) .filter(peer -> !peer.getChannel().isTrustPeer()) - .filter(peer -> peer.getMaliciousFeature().isMalicious()) + .filter(peer -> peer.getFeature().isMalicious()) .filter(peer -> !excludeActive || !peer.getChannel().isActive()) - .min(Comparator.comparing(peer -> peer.getMaliciousFeature().getEarliestTime(), + .min(Comparator.comparing(peer -> peer.getFeature().getEarliestTime(), Long::compareTo)); if (p.isPresent()) { - disconnectMaliciousPeer(p.get(), condition); + disconnectMaliciousPeer(p.get(), strategy); return true; } return false; } - private void disconnectMaliciousPeer(PeerConnection p, String condition) { - logger.info("feature {}: {}, condition:{}", p.getInetSocketAddress(), p.getMaliciousFeature(), - condition); + private void disconnectMaliciousPeer(PeerConnection p, DisconnectStrategy strategy) { + logger.info("feature {}: {}, strategy: {}", p.getInetSocketAddress(), p.getFeature(), + strategy); p.disconnect(ReasonCode.MALICIOUS_NODE); } + public enum DisconnectStrategy { + LAN_NODE, + ISOLATED, + FULL_CONNECTION; + } + public void close() { ExecutorServiceManager.shutdownAndAwaitTermination(executor, esName); } diff --git a/framework/src/test/java/org/tron/core/net/services/ResilienceServiceTest.java b/framework/src/test/java/org/tron/core/net/services/ResilienceServiceTest.java index 061a3c9dd5c..d992fe11544 100644 --- a/framework/src/test/java/org/tron/core/net/services/ResilienceServiceTest.java +++ b/framework/src/test/java/org/tron/core/net/services/ResilienceServiceTest.java @@ -57,7 +57,7 @@ public void destroy() { } @Test - public void testCondition1() { + public void testLanNode() { int minConnection = 8; Assert.assertEquals(minConnection, Args.getInstance().getMinConnections()); @@ -82,7 +82,7 @@ public void testCondition1() { } Assert.assertEquals(minConnection - 1, PeerManager.getPeers().size()); - PeerManager.getPeers().get(0).getMaliciousFeature().updateBadFeature1(); + PeerManager.getPeers().get(0).getFeature().updateBadSyncBlockChainTime(); //not enough peers service.resilienceNode(); Assert.assertEquals(minConnection - 1, PeerManager.getPeers().size()); @@ -96,7 +96,7 @@ public void testCondition1() { } @Test - public void testCondition1StopInv() { + public void testLanNodeStopInv() { int minConnection = 8; Assert.assertEquals(minConnection, Args.getInstance().getMinConnections()); @@ -125,10 +125,10 @@ public void testCondition1StopInv() { Assert.assertEquals(minConnection, PeerManager.getPeers().size()); PeerConnection p = PeerManager.getPeers().get(0); - p.getMaliciousFeature().setAdvStartTime(t1); - p.getMaliciousFeature().setLastRecBlockInvTime(t1); - p.getMaliciousFeature().setStopBlockInvStartTime(t1 + 1); - p.getMaliciousFeature().updateBadFeature4(); + p.getFeature().setAdvStartTime(t1); + p.getFeature().setLastRecBlockInvTime(t1); + p.getFeature().setStopBlockInvStartTime(t1 + 1); + p.getFeature().updateNoInvBackTime(); service.resilienceNode(); Assert.assertEquals(minConnection - 1, PeerManager.getPeers().size()); @@ -137,7 +137,7 @@ public void testCondition1StopInv() { } @Test - public void testCondition2() { + public void testIsolated() { Assert.assertEquals(8, Args.getInstance().getMinConnections()); Assert.assertEquals(30, Args.getInstance().getMaxConnections()); clearPeers(); @@ -170,8 +170,8 @@ public void testCondition2() { if (p.getChannel().isActive()) { p.setNeedSyncFromPeer(false); p.setNeedSyncFromUs(false); - p.getMaliciousFeature().setAdvStartTime(t1); - p.getMaliciousFeature().updateBadFeature3(); + p.getFeature().setAdvStartTime(t1); + p.getFeature().updateNoInteractionTime(); } } Assert.assertEquals(totalNumber, PeerManager.getPeers().size()); @@ -198,7 +198,7 @@ public void testCondition2() { } @Test - public void testCondition3() { + public void testFullConnection() { int maxConnection = 30; Assert.assertEquals(maxConnection, Args.getInstance().getMaxConnections()); clearPeers(); @@ -224,14 +224,14 @@ public void testCondition3() { Assert.assertEquals(maxConnection, PeerManager.getPeers().size()); //set one active peer to malicious (any feature is ok) - PeerManager.getPeers().get(0).getMaliciousFeature().updateBadFeature1(); + PeerManager.getPeers().get(0).getFeature().updateBadSyncBlockChainTime(); service.resilienceNode(); Assert.assertEquals(maxConnection, PeerManager.getPeers().size()); //set two passive peers to malicious (any feature is ok) String firstIp = PeerManager.getPeers().get(activeNumber).getChannel().getInetAddress() .getHostName(); - PeerManager.getPeers().get(activeNumber).getMaliciousFeature().updateBadFeature2(); + PeerManager.getPeers().get(activeNumber).getFeature().updateBadChainInventoryTime(); try { Thread.sleep(100); } catch (InterruptedException e) { @@ -239,7 +239,7 @@ public void testCondition3() { } String secondIp = PeerManager.getPeers().get(activeNumber + 5).getChannel().getInetAddress() .getHostName(); - PeerManager.getPeers().get(activeNumber + 5).getMaliciousFeature().updateBadFeature2(); + PeerManager.getPeers().get(activeNumber + 5).getFeature().updateBadChainInventoryTime(); //verify that disconnect one malicious peer service.resilienceNode(); Assert.assertEquals(maxConnection - 1, PeerManager.getPeers().size()); From 3a85f98eac6fc6b1139e29ec9df164a3fd1a091c Mon Sep 17 00:00:00 2001 From: jiangyuanshu <317787106@qq.com> Date: Wed, 10 Jul 2024 12:41:23 +0800 Subject: [PATCH 28/29] check if addInv success --- .../common/parameter/ResilienceConfig.java | 4 +- .../src/main/java/org/tron/core/Constant.java | 2 +- .../java/org/tron/core/config/args/Args.java | 7 +-- .../main/java/org/tron/core/db/Manager.java | 2 +- .../org/tron/core/net/TronNetService.java | 2 +- .../messagehandler/InventoryMsgHandler.java | 5 +- .../tron/core/net/peer/PeerConnection.java | 34 +++++++------ .../tron/core/net/service/adv/AdvService.java | 51 ++++++++++--------- .../service/effective/ResilienceService.java | 4 +- .../src/main/resources/config-test-net.conf | 10 ++-- framework/src/main/resources/config.conf | 10 ++-- .../net/services/ResilienceServiceTest.java | 12 ++--- framework/src/test/resources/config-test.conf | 10 ++-- 13 files changed, 81 insertions(+), 72 deletions(-) diff --git a/common/src/main/java/org/tron/common/parameter/ResilienceConfig.java b/common/src/main/java/org/tron/common/parameter/ResilienceConfig.java index c3996a5fd41..e5646a4e8df 100644 --- a/common/src/main/java/org/tron/common/parameter/ResilienceConfig.java +++ b/common/src/main/java/org/tron/common/parameter/ResilienceConfig.java @@ -16,7 +16,7 @@ public class ResilienceConfig { @Getter @Setter - private int peerNotActiveThreshold = 600; + private int peerInactiveThreshold = 600; @Getter @Setter @@ -24,7 +24,7 @@ public class ResilienceConfig { @Getter @Setter - private boolean testStopInv = false; + private boolean stopInvEnable = false; @Getter @Setter diff --git a/common/src/main/java/org/tron/core/Constant.java b/common/src/main/java/org/tron/core/Constant.java index 1e8f1921c97..7f301c54f28 100644 --- a/common/src/main/java/org/tron/core/Constant.java +++ b/common/src/main/java/org/tron/core/Constant.java @@ -128,7 +128,7 @@ public class Constant { public static final String NODE_RESILIENCE_CHECK_INTERVAL = "node.resilience.checkInterval"; public static final String NODE_RESILIENCE_PEER_NOT_ACTIVE_THRESHOLD = "node.resilience.peerNotActiveThreshold"; public static final String NODE_RESILIENCE_BLOCK_NOT_CHANGE_THRESHOLD = "node.resilience.blockNotChangeThreshold"; - public static final String NODE_RESILIENCE_TEST_STOP_INV = "node.resilience.testStopInv"; + public static final String NODE_RESILIENCE_STOP_INV_ENABLE = "node.resilience.stopInvEnable"; public static final String NODE_RESILIENCE_DISCONNECT_NUMBER = "node.resilience.disconnectNumber"; public static final String NODE_RPC_PORT = "node.rpc.port"; diff --git a/framework/src/main/java/org/tron/core/config/args/Args.java b/framework/src/main/java/org/tron/core/config/args/Args.java index b31a6afb752..095a5f91f13 100644 --- a/framework/src/main/java/org/tron/core/config/args/Args.java +++ b/framework/src/main/java/org/tron/core/config/args/Args.java @@ -1496,15 +1496,16 @@ private static ResilienceConfig loadResilienceConfig(final com.typesafe.config.C resilienceConfig.setCheckInterval(config.getInt(Constant.NODE_RESILIENCE_CHECK_INTERVAL)); } if (config.hasPath(Constant.NODE_RESILIENCE_PEER_NOT_ACTIVE_THRESHOLD)) { - resilienceConfig.setPeerNotActiveThreshold( + resilienceConfig.setPeerInactiveThreshold( config.getInt(Constant.NODE_RESILIENCE_PEER_NOT_ACTIVE_THRESHOLD)); } if (config.hasPath(Constant.NODE_RESILIENCE_BLOCK_NOT_CHANGE_THRESHOLD)) { resilienceConfig.setBlockNotChangeThreshold( config.getInt(Constant.NODE_RESILIENCE_BLOCK_NOT_CHANGE_THRESHOLD)); } - if (config.hasPath(Constant.NODE_RESILIENCE_TEST_STOP_INV)) { - resilienceConfig.setTestStopInv(config.getBoolean(Constant.NODE_RESILIENCE_TEST_STOP_INV)); + if (config.hasPath(Constant.NODE_RESILIENCE_STOP_INV_ENABLE)) { + resilienceConfig.setStopInvEnable( + config.getBoolean(Constant.NODE_RESILIENCE_STOP_INV_ENABLE)); } if (config.hasPath(Constant.NODE_RESILIENCE_DISCONNECT_NUMBER)) { resilienceConfig.setDisconnectNumber( diff --git a/framework/src/main/java/org/tron/core/db/Manager.java b/framework/src/main/java/org/tron/core/db/Manager.java index 8b25309c92d..7cfa73d0c39 100644 --- a/framework/src/main/java/org/tron/core/db/Manager.java +++ b/framework/src/main/java/org/tron/core/db/Manager.java @@ -1391,7 +1391,6 @@ public void updateDynamicProperties(BlockCapsule block) { (chainBaseManager.getDynamicPropertiesStore().getLatestBlockHeaderNumber() - chainBaseManager.getDynamicPropertiesStore().getLatestSolidifiedBlockNum() + 1)); - chainBaseManager.setLatestSaveBlockTime(System.currentTimeMillis()); Metrics.gaugeSet(MetricKeys.Gauge.HEADER_HEIGHT, block.getNum()); Metrics.gaugeSet(MetricKeys.Gauge.HEADER_TIME, block.getTimeStamp()); } @@ -1806,6 +1805,7 @@ private void processBlock(BlockCapsule block, List txs) updateDynamicProperties(block); chainBaseManager.getBalanceTraceStore().resetCurrentBlockTrace(); + chainBaseManager.setLatestSaveBlockTime(System.currentTimeMillis()); if (CommonParameter.getInstance().isJsonRpcFilterEnabled()) { Bloom blockBloom = chainBaseManager.getSectionBloomStore() diff --git a/framework/src/main/java/org/tron/core/net/TronNetService.java b/framework/src/main/java/org/tron/core/net/TronNetService.java index 4f22b4ac26d..c1c2a99e37b 100644 --- a/framework/src/main/java/org/tron/core/net/TronNetService.java +++ b/framework/src/main/java/org/tron/core/net/TronNetService.java @@ -184,7 +184,7 @@ private P2pConfig updateConfig(P2pConfig config) { config.setPort(parameter.getNodeListenPort()); config.setNetworkId(parameter.getNodeP2pVersion()); config.setDisconnectionPolicyEnable(parameter.isOpenFullTcpDisconnect()); - config.setNotActiveInterval(parameter.getResilienceConfig().getPeerNotActiveThreshold()); + config.setNotActiveInterval(parameter.getResilienceConfig().getPeerInactiveThreshold()); config.setNodeDetectEnable(parameter.isNodeDetectEnable()); config.setDiscoverEnable(parameter.isNodeDiscoveryEnable()); if (StringUtils.isEmpty(config.getIp()) && hasIpv4Stack(NetUtil.getAllLocalAddress())) { diff --git a/framework/src/main/java/org/tron/core/net/messagehandler/InventoryMsgHandler.java b/framework/src/main/java/org/tron/core/net/messagehandler/InventoryMsgHandler.java index ab6f340bc90..a00d3124835 100644 --- a/framework/src/main/java/org/tron/core/net/messagehandler/InventoryMsgHandler.java +++ b/framework/src/main/java/org/tron/core/net/messagehandler/InventoryMsgHandler.java @@ -38,9 +38,10 @@ public void processMessage(PeerConnection peer, TronMessage msg) { for (Sha256Hash id : inventoryMessage.getHashList()) { Item item = new Item(id, type); peer.getAdvInvReceive().put(item, System.currentTimeMillis()); - advService.addInv(item); + boolean flag = advService.addInv(item); - if (type.equals(InventoryType.BLOCK) && peer.getAdvInvSpread().getIfPresent(item) == null) { + if (type.equals(InventoryType.BLOCK) && flag + && peer.getAdvInvSpread().getIfPresent(item) == null) { peer.getFeature().setLastRecBlockInvTime(System.currentTimeMillis()); } } diff --git a/framework/src/main/java/org/tron/core/net/peer/PeerConnection.java b/framework/src/main/java/org/tron/core/net/peer/PeerConnection.java index 32c2d73ee67..50e0ae1bcbd 100644 --- a/framework/src/main/java/org/tron/core/net/peer/PeerConnection.java +++ b/framework/src/main/java/org/tron/core/net/peer/PeerConnection.java @@ -82,8 +82,8 @@ public class PeerConnection { private final ResilienceConfig resilienceConfig = Args.getInstance().getResilienceConfig(); - private final boolean testStopInv = Args.getInstance().getResilienceConfig().isEnabled() - && Args.getInstance().getResilienceConfig().isTestStopInv(); + private final boolean stopInvEnable = Args.getInstance().getResilienceConfig().isEnabled() + && Args.getInstance().getResilienceConfig().isStopInvEnable(); @Getter private final Feature feature = new Feature(); @@ -339,9 +339,9 @@ public synchronized boolean checkAndPutAdvInvRequest(Item key, Long value) { // if peer is in adv status and no block received and sent between us for too long, and we // don't receive block inventory also, then the peer is not active. - public boolean isNotActiveTooLong() { + public boolean isInactiveTooLong() { return System.currentTimeMillis() - getLatestTime() - > resilienceConfig.getPeerNotActiveThreshold() * 1000L; + > resilienceConfig.getPeerInactiveThreshold() * 1000L; } private long getLatestTime() { @@ -367,7 +367,7 @@ public class Feature { //four features private long badSyncBlockChainTime; private long badChainInventoryTime; - private long noInteractionTime; + private long inactiveTime; private long noInvBackTime; public Feature() { @@ -391,12 +391,14 @@ public void updateBadChainInventoryTime() { } // if peer is in adv status and no block received and sent between us for too long, it is - // malicious. - public void updateNoInteractionTime() { - long tempTime = Math.max(channel.getLastActiveTime(), advStartTime); - if (!needSyncFromPeer && !needSyncFromUs && System.currentTimeMillis() - tempTime - > resilienceConfig.getPeerNotActiveThreshold() * 1000L) { - noInteractionTime = tempTime; + // malicious. It can recover to normal. + public void updateInactiveTime() { + long maxTime = Math.max(channel.getLastActiveTime(), advStartTime); + if (!needSyncFromPeer && !needSyncFromUs && System.currentTimeMillis() - maxTime + > resilienceConfig.getPeerInactiveThreshold() * 1000L) { + inactiveTime = maxTime; + } else { + inactiveTime = 0; } } @@ -417,9 +419,9 @@ public long getEarliestTime() { times.add(badChainInventoryTime); } - if (!testStopInv) { - if (noInteractionTime > 0) { - times.add(noInteractionTime); + if (!stopInvEnable) { + if (inactiveTime > 0) { + times.add(inactiveTime); } } else { if (noInvBackTime > 0) { @@ -435,14 +437,14 @@ public long getEarliestTime() { public boolean isMalicious() { //if testStopInv=true, we use noInvBackTime, else use noInteractionTime. // We an only use one of them. - boolean isMalicious = testStopInv ? (noInvBackTime > 0) : (noInteractionTime > 0); + boolean isMalicious = stopInvEnable ? (noInvBackTime > 0) : (inactiveTime > 0); return badSyncBlockChainTime > 0 || badChainInventoryTime > 0 || isMalicious; } @Override public String toString() { return String.format("(1:[%d] 2:[%d] 3:[%d] 4:[%d])", - badSyncBlockChainTime, badChainInventoryTime, noInteractionTime, noInvBackTime); + badSyncBlockChainTime, badChainInventoryTime, inactiveTime, noInvBackTime); } } diff --git a/framework/src/main/java/org/tron/core/net/service/adv/AdvService.java b/framework/src/main/java/org/tron/core/net/service/adv/AdvService.java index 306c9a04723..85441729efd 100644 --- a/framework/src/main/java/org/tron/core/net/service/adv/AdvService.java +++ b/framework/src/main/java/org/tron/core/net/service/adv/AdvService.java @@ -47,8 +47,8 @@ public class AdvService { private final int MAX_SPREAD_SIZE = 1_000; private final int TEST_PAUSE_INV_SECONDS = 10; private final long TIMEOUT = MSG_CACHE_DURATION_IN_BLOCKS * BLOCK_PRODUCED_INTERVAL; - private final boolean testStopInv = Args.getInstance().getResilienceConfig().isEnabled() - && Args.getInstance().getResilienceConfig().isTestStopInv(); + private final boolean stopInvEnable = Args.getInstance().getResilienceConfig().isEnabled() + && Args.getInstance().getResilienceConfig().isStopInvEnable(); @Autowired private TronNetDelegate tronNetDelegate; @@ -326,6 +326,30 @@ private synchronized void consumerInvToSpread() { invSender.sendInv(); } + private boolean canSendBlockInventory(PeerConnection peer) { + long now = System.currentTimeMillis(); + boolean canSend = true; + //if peer is not active for too long, test if peer will broadcast block inventory to me + //after I stop broadcasting block inventory to it + if (stopInvEnable && peer.isInactiveTooLong() + && peer.getFeature().getStopBlockInvStartTime() == 0) { + logger.info("Test to stop broadcast block inventory to {}", peer.getInetSocketAddress()); + peer.getFeature().setStopBlockInvStartTime(now); + peer.getFeature().setStopBlockInvEndTime(now + TEST_PAUSE_INV_SECONDS * 1000L); + invCheckExecutor.schedule(() -> peer.getFeature().updateNoInvBackTime(), + TEST_PAUSE_INV_SECONDS, TimeUnit.SECONDS); + canSend = false; + } + if (peer.getFeature().getStopBlockInvStartTime() <= now + && now <= peer.getFeature().getStopBlockInvEndTime()) { + canSend = false; + } + if (peer.getFeature().getNoInvBackTime() > 0) { + canSend = false; + } + return canSend; + } + class InvSender { private HashMap>> send @@ -369,33 +393,14 @@ public int getSize(PeerConnection peer) { } public void sendInv() { - long now = System.currentTimeMillis(); send.forEach((peer, ids) -> ids.forEach((key, value) -> { if (peer.isRelayPeer() && key.equals(InventoryType.TRX)) { return; } if (key.equals(InventoryType.BLOCK)) { value.sort(Comparator.comparingLong(value1 -> new BlockId(value1).getNum())); - boolean canSendBlockInventory = true; - if (testStopInv && peer.isNotActiveTooLong() - && peer.getFeature().getStopBlockInvStartTime() == 0) { - //if peer is not active for too long, test if peer will broadcast block inventory to me - //after I stop broadcasting block inventory to it - logger.info("Test to stop broadcast block inv to {}", peer.getInetSocketAddress()); - peer.getFeature().setStopBlockInvStartTime(now); - peer.getFeature().setStopBlockInvEndTime(now + TEST_PAUSE_INV_SECONDS * 1000L); - invCheckExecutor.schedule(() -> peer.getFeature().updateNoInvBackTime(), - TEST_PAUSE_INV_SECONDS, TimeUnit.SECONDS); - canSendBlockInventory = false; - } - if (peer.getFeature().getStopBlockInvStartTime() <= now - && now <= peer.getFeature().getStopBlockInvEndTime()) { - canSendBlockInventory = false; - } - if (peer.getFeature().getNoInvBackTime() > 0) { - canSendBlockInventory = false; - } - if (canSendBlockInventory) { + boolean canSend = canSendBlockInventory(peer); + if (canSend) { peer.sendMessage(new InventoryMessage(value, key)); } } else { diff --git a/framework/src/main/java/org/tron/core/net/service/effective/ResilienceService.java b/framework/src/main/java/org/tron/core/net/service/effective/ResilienceService.java index fc1e49f6ca3..01368ca9a18 100644 --- a/framework/src/main/java/org/tron/core/net/service/effective/ResilienceService.java +++ b/framework/src/main/java/org/tron/core/net/service/effective/ResilienceService.java @@ -51,10 +51,10 @@ public void init() { public void resilienceNode() { - if (!resilienceConfig.isTestStopInv()) { + if (!resilienceConfig.isStopInvEnable()) { //update peers' bad feature 3 at first tronNetDelegate.getActivePeer() - .forEach(peer -> peer.getFeature().updateNoInteractionTime()); + .forEach(peer -> peer.getFeature().updateInactiveTime()); } int peerSize = tronNetDelegate.getActivePeer().size(); diff --git a/framework/src/main/resources/config-test-net.conf b/framework/src/main/resources/config-test-net.conf index 0fcc22d8549..8c632bcf3cc 100644 --- a/framework/src/main/resources/config-test-net.conf +++ b/framework/src/main/resources/config-test-net.conf @@ -73,20 +73,20 @@ node.backup { node.resilience { enable = false - # the period (seconds) of check zombie nodes + # the period (seconds) of check malicious nodes checkInterval = 60 - # If one peer becomes inactive for more than peerNotActiveThreshold (seconds), it is a zombie. - peerNotActiveThreshold = 600 + # If one peer becomes inactive for more than peerInactiveThreshold (seconds), it is malicious. + peerInactiveThreshold = 600 # if the latest block number stay unchanged for more than blockNotChangeThreshold (seconds), it is isolated blockNotChangeThreshold = 300 # if peer is not active for too long, whether to test if peer will broadcast block inventory to me # after I stop broadcasting block inventory to it. - testStopInv = false + stopInvEnable = false - # If the node is isolated, the number of zombie nodes will be disconnected. In other case, it's always 1 + # If the node is isolated, the number of malicious nodes will be disconnected. In other case, it's always 1 disconnectNumber = 2 } diff --git a/framework/src/main/resources/config.conf b/framework/src/main/resources/config.conf index 4aca6ab1f15..025f5a4447e 100644 --- a/framework/src/main/resources/config.conf +++ b/framework/src/main/resources/config.conf @@ -151,20 +151,20 @@ crypto { node.resilience { enable = false - # the period (seconds) of check zombie nodes + # the period (seconds) of check malicious nodes checkInterval = 60 - # If one peer becomes inactive for more than peerNotActiveThreshold (seconds), it is a zombie. - peerNotActiveThreshold = 600 + # If one peer becomes inactive for more than peerInactiveThreshold (seconds), it is malicious. + peerInactiveThreshold = 600 # if the latest block number stay unchanged for more than blockNotChangeThreshold (seconds), it is isolated blockNotChangeThreshold = 300 # if peer is not active for too long, whether to test if peer will broadcast block inventory to me # after I stop broadcasting block inventory to it. - testStopInv = false + stopInvEnable = false - # If the node is isolated, the number of zombie nodes will be disconnected. In other case, it's always 1 + # If the node is isolated, the number of malicious nodes will be disconnected. In other case, it's always 1 disconnectNumber = 2 } diff --git a/framework/src/test/java/org/tron/core/net/services/ResilienceServiceTest.java b/framework/src/test/java/org/tron/core/net/services/ResilienceServiceTest.java index d992fe11544..28df4190c38 100644 --- a/framework/src/test/java/org/tron/core/net/services/ResilienceServiceTest.java +++ b/framework/src/test/java/org/tron/core/net/services/ResilienceServiceTest.java @@ -100,15 +100,15 @@ public void testLanNodeStopInv() { int minConnection = 8; Assert.assertEquals(minConnection, Args.getInstance().getMinConnections()); - Assert.assertFalse(resilienceConfig.isTestStopInv()); + Assert.assertFalse(resilienceConfig.isStopInvEnable()); clearPeers(); Assert.assertEquals(0, PeerManager.getPeers().size()); // test stop inventory - resilienceConfig.setTestStopInv(true); + resilienceConfig.setStopInvEnable(true); long t1 = - System.currentTimeMillis() - resilienceConfig.getPeerNotActiveThreshold() * 1000L - 1000L; + System.currentTimeMillis() - resilienceConfig.getPeerInactiveThreshold() * 1000L - 1000L; for (int i = 0; i < minConnection; i++) { InetSocketAddress inetSocketAddress = new InetSocketAddress("201.0.0." + i, 10001); Channel c1 = spy(Channel.class); @@ -133,7 +133,7 @@ public void testLanNodeStopInv() { Assert.assertEquals(minConnection - 1, PeerManager.getPeers().size()); //resume config - resilienceConfig.setTestStopInv(false); + resilienceConfig.setStopInvEnable(false); } @Test @@ -149,7 +149,7 @@ public void testIsolated() { int totalNumber = 20; List channelList = new ArrayList<>(); long t1 = - System.currentTimeMillis() - resilienceConfig.getPeerNotActiveThreshold() * 1000L - 1000L; + System.currentTimeMillis() - resilienceConfig.getPeerInactiveThreshold() * 1000L - 1000L; for (int i = 0; i < totalNumber; i++) { InetSocketAddress inetSocketAddress = new InetSocketAddress("201.0.0." + i, 10001); Channel c1 = spy(Channel.class); @@ -171,7 +171,7 @@ public void testIsolated() { p.setNeedSyncFromPeer(false); p.setNeedSyncFromUs(false); p.getFeature().setAdvStartTime(t1); - p.getFeature().updateNoInteractionTime(); + p.getFeature().updateInactiveTime(); } } Assert.assertEquals(totalNumber, PeerManager.getPeers().size()); diff --git a/framework/src/test/resources/config-test.conf b/framework/src/test/resources/config-test.conf index c8997d242a7..a5261b35c00 100644 --- a/framework/src/test/resources/config-test.conf +++ b/framework/src/test/resources/config-test.conf @@ -86,20 +86,20 @@ node.discovery = { node.resilience { enable = true - # the period (seconds) of check zombie nodes + # the period (seconds) of check malicious nodes checkInterval = 60 - # If one peer becomes inactive for more than peerNotActiveThreshold (seconds), it is a zombie. - peerNotActiveThreshold = 600 + # If one peer becomes inactive for more than peerInactiveThreshold (seconds), it is malicious. + peerInactiveThreshold = 600 # if the latest block number stay unchanged for more than blockNotChangeThreshold (seconds), it is isolated blockNotChangeThreshold = 300 # if peer is not active for too long, whether to test if peer will broadcast block inventory to me # after I stop broadcasting block inventory to it. - testStopInv = false + stopInvEnable = false - # If the node is isolated, the number of zombie nodes will be disconnected. In other case, it's always 1 + # If the node is isolated, the number of malicious nodes will be disconnected. In other case, it's always 1 disconnectNumber = 2 } From eafda509aebb678002c4d877c5e55bf904162431 Mon Sep 17 00:00:00 2001 From: jiangyuanshu <317787106@qq.com> Date: Wed, 10 Jul 2024 16:34:23 +0800 Subject: [PATCH 29/29] use ReasonCode.BAD_PROTOCOL; noInvBackTime is recoverable --- .../core/net/messagehandler/InventoryMsgHandler.java | 1 + .../java/org/tron/core/net/peer/PeerConnection.java | 12 +++++++++++- .../net/service/effective/ResilienceService.java | 2 +- protocol/src/main/protos/core/Tron.proto | 1 - 4 files changed, 13 insertions(+), 3 deletions(-) diff --git a/framework/src/main/java/org/tron/core/net/messagehandler/InventoryMsgHandler.java b/framework/src/main/java/org/tron/core/net/messagehandler/InventoryMsgHandler.java index a00d3124835..0c7532b31a4 100644 --- a/framework/src/main/java/org/tron/core/net/messagehandler/InventoryMsgHandler.java +++ b/framework/src/main/java/org/tron/core/net/messagehandler/InventoryMsgHandler.java @@ -43,6 +43,7 @@ public void processMessage(PeerConnection peer, TronMessage msg) { if (type.equals(InventoryType.BLOCK) && flag && peer.getAdvInvSpread().getIfPresent(item) == null) { peer.getFeature().setLastRecBlockInvTime(System.currentTimeMillis()); + peer.getFeature().resetNoInvBackTime(); } } } diff --git a/framework/src/main/java/org/tron/core/net/peer/PeerConnection.java b/framework/src/main/java/org/tron/core/net/peer/PeerConnection.java index 50e0ae1bcbd..a20e7fdb8cd 100644 --- a/framework/src/main/java/org/tron/core/net/peer/PeerConnection.java +++ b/framework/src/main/java/org/tron/core/net/peer/PeerConnection.java @@ -300,7 +300,6 @@ private void processDisconnect(Protocol.ReasonCode reason) { case BAD_PROTOCOL: case BAD_BLOCK: case BAD_TX: - case MALICIOUS_NODE: channel.close(BAD_PEER_BAN_TIME); break; default: @@ -410,6 +409,17 @@ public void updateNoInvBackTime() { } } + // if i receive valid block inventory from malicious peer with noInvBackTime > 0 before it is + // disconnected, then i reset this peer to normal + public void resetNoInvBackTime() { + if (stopInvEnable && noInvBackTime > 0) { + logger.info("resetNoInvBackTime peer {}", channel.getInetSocketAddress()); + noInvBackTime = 0; + stopBlockInvStartTime = 0; + stopBlockInvEndTime = 0; + } + } + public long getEarliestTime() { List times = new ArrayList<>(); if (badSyncBlockChainTime > 0) { diff --git a/framework/src/main/java/org/tron/core/net/service/effective/ResilienceService.java b/framework/src/main/java/org/tron/core/net/service/effective/ResilienceService.java index 01368ca9a18..5e3cd6045da 100644 --- a/framework/src/main/java/org/tron/core/net/service/effective/ResilienceService.java +++ b/framework/src/main/java/org/tron/core/net/service/effective/ResilienceService.java @@ -136,7 +136,7 @@ private boolean findAndDisconnect(boolean excludeActive, DisconnectStrategy stra private void disconnectMaliciousPeer(PeerConnection p, DisconnectStrategy strategy) { logger.info("feature {}: {}, strategy: {}", p.getInetSocketAddress(), p.getFeature(), strategy); - p.disconnect(ReasonCode.MALICIOUS_NODE); + p.disconnect(ReasonCode.BAD_PROTOCOL); } public enum DisconnectStrategy { diff --git a/protocol/src/main/protos/core/Tron.proto b/protocol/src/main/protos/core/Tron.proto index 805a0ed2e8a..2ffefbf9f3e 100644 --- a/protocol/src/main/protos/core/Tron.proto +++ b/protocol/src/main/protos/core/Tron.proto @@ -604,7 +604,6 @@ enum ReasonCode { BELOW_THAN_ME = 0X24; NOT_WITNESS = 0x25; NO_SUCH_MESSAGE = 0x26; - MALICIOUS_NODE = 0X27; UNKNOWN = 0xFF; }