Skip to content

Commit 8ef1317

Browse files
committed
Merge branch '4.11'
2 parents 1708838 + 30175d6 commit 8ef1317

71 files changed

Lines changed: 3727 additions & 3518 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

agent/conf/agent.properties

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,17 @@ workers=5
3030
#host= The IP address of management server
3131
host=localhost
3232

33+
# The time interval in seconds after which agent will check if connected host
34+
# is the preferred host (the first host in the comma-separated list is preferred
35+
# one) and will attempt to reconnect to the preferred host when it's connected
36+
# to one of the secondary/backup hosts. The timer task is scheduled after agent
37+
# connects to a management server. On connection, it receives admin configured
38+
# cluster-level 'indirect.agent.lb.check.interval' setting that will be used by
39+
# the agent as the preferred host check interval however the following setting
40+
# if defined overrides the received value. The value 0 and lb algorithm 'shuffle'
41+
# disables this background task.
42+
#host.lb.check.interval=0
43+
3344
#port = The port management server listening on, default is 8250
3445
port=8250
3546

agent/src/main/java/com/cloud/agent/Agent.java

Lines changed: 106 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@
2121
import java.io.PrintWriter;
2222
import java.io.StringWriter;
2323
import java.net.InetAddress;
24+
import java.net.InetSocketAddress;
25+
import java.net.Socket;
2426
import java.net.UnknownHostException;
2527
import java.nio.channels.ClosedChannelException;
2628
import java.nio.charset.Charset;
@@ -38,12 +40,15 @@
3840
import javax.naming.ConfigurationException;
3941

4042
import org.apache.cloudstack.agent.directdownload.SetupDirectDownloadCertificate;
43+
import org.apache.cloudstack.agent.lb.SetupMSListAnswer;
44+
import org.apache.cloudstack.agent.lb.SetupMSListCommand;
4145
import org.apache.cloudstack.ca.SetupCertificateAnswer;
4246
import org.apache.cloudstack.ca.SetupCertificateCommand;
4347
import org.apache.cloudstack.ca.SetupKeyStoreCommand;
4448
import org.apache.cloudstack.ca.SetupKeystoreAnswer;
4549
import org.apache.cloudstack.managed.context.ManagedContextTimerTask;
4650
import org.apache.cloudstack.utils.security.KeyStoreUtils;
51+
import org.apache.commons.collections.CollectionUtils;
4752
import org.apache.commons.io.FileUtils;
4853
import org.apache.log4j.Logger;
4954
import org.slf4j.MDC;
@@ -65,6 +70,7 @@
6570
import com.cloud.exception.AgentControlChannelException;
6671
import com.cloud.resource.ServerResource;
6772
import com.cloud.utils.PropertiesUtil;
73+
import com.cloud.utils.StringUtils;
6874
import com.cloud.utils.backoff.BackoffAlgorithm;
6975
import com.cloud.utils.concurrency.NamedThreadFactory;
7076
import com.cloud.utils.exception.CloudRuntimeException;
@@ -121,6 +127,7 @@ public int value() {
121127
Long _id;
122128

123129
Timer _timer = new Timer("Agent Timer");
130+
Timer hostLBTimer;
124131

125132
List<WatchTask> _watchList = new ArrayList<WatchTask>();
126133
long _sequence = 0;
@@ -144,7 +151,7 @@ public Agent(final IAgentShell shell) {
144151
_shell = shell;
145152
_link = null;
146153

147-
_connection = new NioClient("Agent", _shell.getHost(), _shell.getPort(), _shell.getWorkers(), this);
154+
_connection = new NioClient("Agent", _shell.getNextHost(), _shell.getPort(), _shell.getWorkers(), this);
148155

149156
Runtime.getRuntime().addShutdownHook(new ShutdownThread(this));
150157

@@ -179,7 +186,7 @@ public Agent(final IAgentShell shell, final int localAgentId, final ServerResour
179186
throw new ConfigurationException("Unable to configure " + _resource.getName());
180187
}
181188

182-
final String host = _shell.getHost();
189+
final String host = _shell.getNextHost();
183190
_connection = new NioClient("Agent", host, _shell.getPort(), _shell.getWorkers(), this);
184191

185192
// ((NioClient)_connection).setBindAddress(_shell.getPrivateIp());
@@ -255,7 +262,7 @@ public void start() {
255262
s_logger.info("Attempted to connect to the server, but received an unexpected exception, trying again...");
256263
}
257264
while (!_connection.isStartup()) {
258-
final String host = _shell.getHost();
265+
final String host = _shell.getNextHost();
259266
_shell.getBackoffAlgorithm().waitBeforeRetry();
260267
_connection = new NioClient("Agent", host, _shell.getPort(), _shell.getWorkers(), this);
261268
s_logger.info("Connecting to host:" + host);
@@ -266,6 +273,7 @@ public void start() {
266273
s_logger.info("Attempted to connect to the server, but received an unexpected exception, trying again...");
267274
}
268275
}
276+
_shell.updateConnectedHost();
269277
}
270278

271279
public void stop(final String reason, final String detail) {
@@ -310,6 +318,17 @@ public void setId(final Long id) {
310318
_shell.setPersistentProperty(getResourceName(), "id", Long.toString(id));
311319
}
312320

321+
private synchronized void scheduleHostLBCheckerTask(final long checkInterval) {
322+
if (hostLBTimer != null) {
323+
hostLBTimer.cancel();
324+
}
325+
if (checkInterval > 0L) {
326+
s_logger.info("Scheduling preferred host timer task with host.lb.interval=" + checkInterval + "ms");
327+
hostLBTimer = new Timer("Host LB Timer");
328+
hostLBTimer.scheduleAtFixedRate(new PreferredHostCheckerTask(), checkInterval, checkInterval);
329+
}
330+
}
331+
313332
public void scheduleWatch(final Link link, final Request request, final long delay, final long period) {
314333
synchronized (_watchList) {
315334
if (s_logger.isDebugEnabled()) {
@@ -332,18 +351,20 @@ protected void cancelTasks() {
332351
_watchList.clear();
333352
}
334353
}
335-
public synchronized void lockStartupTask(final Link link)
336-
{
354+
355+
public synchronized void lockStartupTask(final Link link) {
337356
_startup = new StartupTask(link);
338357
_timer.schedule(_startup, _startupWait);
339358
}
340359

341360
public void sendStartup(final Link link) {
342361
final StartupCommand[] startup = _resource.initialize();
343362
if (startup != null) {
363+
final String msHostList = _shell.getPersistentProperty(null, "host");
344364
final Command[] commands = new Command[startup.length];
345365
for (int i = 0; i < startup.length; i++) {
346366
setupStartupCommand(startup[i]);
367+
startup[i].setMSHostList(msHostList);
347368
commands[i] = startup[i];
348369
}
349370
final Request request = new Request(_id != null ? _id : -1, -1, commands, false, false);
@@ -402,19 +423,23 @@ protected void reconnect(final Link link) {
402423
}
403424
}
404425

405-
link.close();
406-
link.terminated();
426+
if (link != null) {
427+
link.close();
428+
link.terminated();
429+
}
407430

408431
setLink(null);
409432
cancelTasks();
410433

411434
_resource.disconnected();
412435

436+
final String lastConnectedHost = _shell.getConnectedHost();
437+
413438
int inProgress = 0;
414439
do {
415440
_shell.getBackoffAlgorithm().waitBeforeRetry();
416441

417-
s_logger.info("Lost connection to the server. Dealing with the remaining commands...");
442+
s_logger.info("Lost connection to host: " + lastConnectedHost + ". Dealing with the remaining commands...");
418443

419444
inProgress = _inProgress.get();
420445
if (inProgress > 0) {
@@ -434,7 +459,7 @@ protected void reconnect(final Link link) {
434459
_shell.getBackoffAlgorithm().waitBeforeRetry();
435460
}
436461

437-
final String host = _shell.getHost();
462+
final String host = _shell.getNextHost();
438463
do {
439464
_connection = new NioClient("Agent", host, _shell.getPort(), _shell.getWorkers(), this);
440465
s_logger.info("Reconnecting to host:" + host);
@@ -452,7 +477,8 @@ protected void reconnect(final Link link) {
452477
}
453478
_shell.getBackoffAlgorithm().waitBeforeRetry();
454479
} while (!_connection.isStartup());
455-
s_logger.info("Connected to the server");
480+
_shell.updateConnectedHost();
481+
s_logger.info("Connected to the host: " + _shell.getConnectedHost());
456482
}
457483

458484
public void processStartupAnswer(final Answer answer, final Response response, final Link link) {
@@ -554,6 +580,8 @@ protected void processRequest(final Request request, final Link link) {
554580
answer = setupAgentCertificate((SetupCertificateCommand) cmd);
555581
} else if (cmd instanceof SetupDirectDownloadCertificate) {
556582
answer = setupDirectDownloadCertificate((SetupDirectDownloadCertificate) cmd);
583+
} else if (cmd instanceof SetupMSListCommand) {
584+
answer = setupManagementServerList((SetupMSListCommand) cmd);
557585
} else {
558586
if (cmd instanceof ReadyCommand) {
559587
processReadyCommand(cmd);
@@ -708,6 +736,30 @@ private Answer setupAgentCertificate(final SetupCertificateCommand cmd) {
708736
return new SetupCertificateAnswer(true);
709737
}
710738

739+
private void processManagementServerList(final List<String> msList, final String lbAlgorithm, final Long lbCheckInterval) {
740+
if (CollectionUtils.isNotEmpty(msList) && !Strings.isNullOrEmpty(lbAlgorithm)) {
741+
try {
742+
final String newMSHosts = String.format("%s%s%s", StringUtils.toCSVList(msList), IAgentShell.hostLbAlgorithmSeparator, lbAlgorithm);
743+
_shell.setPersistentProperty(null, "host", newMSHosts);
744+
_shell.setHosts(newMSHosts);
745+
_shell.resetHostCounter();
746+
s_logger.info("Processed new management server list: " + newMSHosts);
747+
} catch (final Exception e) {
748+
throw new CloudRuntimeException("Could not persist received management servers list", e);
749+
}
750+
}
751+
if ("shuffle".equals(lbAlgorithm)) {
752+
scheduleHostLBCheckerTask(0);
753+
} else {
754+
scheduleHostLBCheckerTask(_shell.getLbCheckerInterval(lbCheckInterval));
755+
}
756+
}
757+
758+
private Answer setupManagementServerList(final SetupMSListCommand cmd) {
759+
processManagementServerList(cmd.getMsList(), cmd.getLbAlgorithm(), cmd.getLbCheckInterval());
760+
return new SetupMSListAnswer(true);
761+
}
762+
711763
public void processResponse(final Response response, final Link link) {
712764
final Answer answer = response.getAnswer();
713765
if (s_logger.isDebugEnabled()) {
@@ -728,15 +780,16 @@ public void processResponse(final Response response, final Link link) {
728780
}
729781

730782
public void processReadyCommand(final Command cmd) {
731-
732783
final ReadyCommand ready = (ReadyCommand)cmd;
733784

734-
s_logger.info("Proccess agent ready command, agent id = " + ready.getHostId());
785+
s_logger.info("Processing agent ready command, agent id = " + ready.getHostId());
735786
if (ready.getHostId() != null) {
736787
setId(ready.getHostId());
737788
}
738-
s_logger.info("Ready command is processed: agent id = " + getId());
739789

790+
processManagementServerList(ready.getMsHostList(), ready.getLbAlgorithm(), ready.getLbCheckInterval());
791+
792+
s_logger.info("Ready command is processed for agent id = " + getId());
740793
}
741794

742795
public void processOtherTask(final Task task) {
@@ -1018,4 +1071,44 @@ public void doTask(final Task task) throws TaskExecutionException {
10181071
}
10191072
}
10201073
}
1074+
1075+
public class PreferredHostCheckerTask extends ManagedContextTimerTask {
1076+
1077+
@Override
1078+
protected void runInContext() {
1079+
try {
1080+
final String[] msList = _shell.getHosts();
1081+
if (msList == null || msList.length < 1) {
1082+
return;
1083+
}
1084+
final String preferredHost = msList[0];
1085+
final String connectedHost = _shell.getConnectedHost();
1086+
if (s_logger.isTraceEnabled()) {
1087+
s_logger.trace("Running preferred host checker task, connected host=" + connectedHost + ", preferred host=" + preferredHost);
1088+
}
1089+
if (preferredHost != null && !preferredHost.equals(connectedHost) && _link != null) {
1090+
boolean isHostUp = true;
1091+
try (final Socket socket = new Socket()) {
1092+
socket.connect(new InetSocketAddress(preferredHost, _shell.getPort()), 5000);
1093+
} catch (final IOException e) {
1094+
isHostUp = false;
1095+
if (s_logger.isTraceEnabled()) {
1096+
s_logger.trace("Host: " + preferredHost + " is not reachable");
1097+
}
1098+
}
1099+
if (isHostUp && _link != null && _inProgress.get() == 0) {
1100+
if (s_logger.isDebugEnabled()) {
1101+
s_logger.debug("Preferred host " + preferredHost + " is found to be reachable, trying to reconnect");
1102+
}
1103+
_shell.resetHostCounter();
1104+
reconnect(_link);
1105+
}
1106+
}
1107+
} catch (Throwable t) {
1108+
s_logger.error("Error caught while attempting to connect to preferred host", t);
1109+
}
1110+
}
1111+
1112+
}
1113+
10211114
}

agent/src/main/java/com/cloud/agent/AgentShell.java

Lines changed: 51 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@
5050
import com.cloud.utils.backoff.BackoffAlgorithm;
5151
import com.cloud.utils.backoff.impl.ConstantTimeBackoff;
5252
import com.cloud.utils.exception.CloudRuntimeException;
53+
import com.google.common.base.Strings;
5354

5455
public class AgentShell implements IAgentShell, Daemon {
5556
private static final Logger s_logger = Logger.getLogger(AgentShell.class.getName());
@@ -72,6 +73,9 @@ public class AgentShell implements IAgentShell, Daemon {
7273
private volatile boolean _exit = false;
7374
private int _pingRetries;
7475
private final List<Agent> _agents = new ArrayList<Agent>();
76+
private String hostToConnect;
77+
private String connectedHost;
78+
private Long preferredHostCheckInterval;
7579

7680
public AgentShell() {
7781
}
@@ -107,18 +111,54 @@ public String getPod() {
107111
}
108112

109113
@Override
110-
public String getHost() {
111-
final String[] hosts = _host.split(",");
114+
public String getNextHost() {
115+
final String[] hosts = getHosts();
112116
if (_hostCounter >= hosts.length) {
113117
_hostCounter = 0;
114118
}
115-
final String host = hosts[_hostCounter % hosts.length];
119+
hostToConnect = hosts[_hostCounter % hosts.length];
116120
_hostCounter++;
117-
return host;
121+
return hostToConnect;
118122
}
119123

120-
public void setHost(final String host) {
121-
_host = host;
124+
@Override
125+
public String getConnectedHost() {
126+
return connectedHost;
127+
}
128+
129+
@Override
130+
public long getLbCheckerInterval(final Long receivedLbInterval) {
131+
if (preferredHostCheckInterval != null) {
132+
return preferredHostCheckInterval * 1000L;
133+
}
134+
if (receivedLbInterval != null) {
135+
return receivedLbInterval * 1000L;
136+
}
137+
return 0L;
138+
}
139+
140+
@Override
141+
public void updateConnectedHost() {
142+
connectedHost = hostToConnect;
143+
}
144+
145+
146+
@Override
147+
public void resetHostCounter() {
148+
_hostCounter = 0;
149+
}
150+
151+
@Override
152+
public String[] getHosts() {
153+
return _host.split(",");
154+
}
155+
156+
@Override
157+
public void setHosts(final String host) {
158+
if (!Strings.isNullOrEmpty(host)) {
159+
_host = host.split(hostLbAlgorithmSeparator)[0];
160+
resetHostCounter();
161+
}
122162
}
123163

124164
@Override
@@ -251,7 +291,8 @@ protected boolean parseCommand(final String[] args) throws ConfigurationExceptio
251291
if (host == null) {
252292
host = "localhost";
253293
}
254-
_host = host;
294+
295+
setHosts(host);
255296

256297
if (zone != null)
257298
_zone = zone;
@@ -291,6 +332,9 @@ protected boolean parseCommand(final String[] args) throws ConfigurationExceptio
291332
_properties.setProperty("guid", _guid);
292333
}
293334

335+
String val = getProperty(null, preferredHostIntervalKey);
336+
preferredHostCheckInterval = (Strings.isNullOrEmpty(val) ? null : Long.valueOf(val));
337+
294338
return true;
295339
}
296340

0 commit comments

Comments
 (0)