001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.zookeeper; 019 020import static org.apache.zookeeper.client.FourLetterWordMain.send4LetterWord; 021 022import java.io.File; 023import java.io.IOException; 024import java.io.InterruptedIOException; 025import java.io.PrintWriter; 026import java.io.StringWriter; 027import java.net.BindException; 028import java.net.ConnectException; 029import java.net.InetAddress; 030import java.net.InetSocketAddress; 031import java.util.ArrayList; 032import java.util.List; 033import java.util.concurrent.ThreadLocalRandom; 034import org.apache.hadoop.conf.Configuration; 035import org.apache.hadoop.hbase.HConstants; 036import org.apache.hadoop.hbase.net.Address; 037import org.apache.hadoop.hbase.util.Threads; 038import org.apache.yetus.audience.InterfaceAudience; 039import org.apache.zookeeper.common.X509Exception; 040import org.apache.zookeeper.server.NIOServerCnxnFactory; 041import org.apache.zookeeper.server.ZooKeeperServer; 042import org.apache.zookeeper.server.persistence.FileTxnLog; 043import org.slf4j.Logger; 044import org.slf4j.LoggerFactory; 045 046/** 047 * TODO: Most of the code in this class is ripped from ZooKeeper tests. Instead of redoing it, we 048 * should contribute updates to their code which let us more easily access testing helper objects. 049 */ 050@InterfaceAudience.Public 051public class MiniZooKeeperCluster { 052 private static final Logger LOG = LoggerFactory.getLogger(MiniZooKeeperCluster.class); 053 private static final int TICK_TIME = 2000; 054 private static final int TIMEOUT = 1000; 055 private static final int DEFAULT_CONNECTION_TIMEOUT = 30000; 056 private int connectionTimeout; 057 public static final String LOOPBACK_HOST = InetAddress.getLoopbackAddress().getHostName(); 058 public static final String HOST = LOOPBACK_HOST; 059 060 private boolean started; 061 062 /** 063 * The default port. If zero, we use a random port. 064 */ 065 private int defaultClientPort = 0; 066 067 private final List<NIOServerCnxnFactory> standaloneServerFactoryList; 068 private final List<ZooKeeperServer> zooKeeperServers; 069 private final List<Integer> clientPortList; 070 071 private int activeZKServerIndex; 072 private int tickTime = 0; 073 074 private final Configuration configuration; 075 076 public MiniZooKeeperCluster() { 077 this(new Configuration()); 078 } 079 080 public MiniZooKeeperCluster(Configuration configuration) { 081 this.started = false; 082 this.configuration = configuration; 083 activeZKServerIndex = -1; 084 zooKeeperServers = new ArrayList<>(); 085 clientPortList = new ArrayList<>(); 086 standaloneServerFactoryList = new ArrayList<>(); 087 connectionTimeout = configuration.getInt(HConstants.ZK_SESSION_TIMEOUT + ".localHBaseCluster", 088 DEFAULT_CONNECTION_TIMEOUT); 089 } 090 091 /** 092 * Add a client port to the list. 093 * @param clientPort the specified port 094 */ 095 public void addClientPort(int clientPort) { 096 clientPortList.add(clientPort); 097 } 098 099 /** 100 * Get the list of client ports. 101 * @return clientPortList the client port list 102 */ 103 @InterfaceAudience.Private 104 public List<Integer> getClientPortList() { 105 return clientPortList; 106 } 107 108 /** 109 * Check whether the client port in a specific position of the client port list is valid. 110 * @param index the specified position 111 */ 112 private boolean hasValidClientPortInList(int index) { 113 return (clientPortList.size() > index && clientPortList.get(index) > 0); 114 } 115 116 public void setDefaultClientPort(int clientPort) { 117 if (clientPort <= 0) { 118 throw new IllegalArgumentException("Invalid default ZK client port: " + clientPort); 119 } 120 this.defaultClientPort = clientPort; 121 } 122 123 /** 124 * Selects a ZK client port. 125 * @param seedPort the seed port to start with; -1 means first time. 126 * @return a valid and unused client port 127 */ 128 private int selectClientPort(int seedPort) { 129 int i; 130 int returnClientPort = seedPort + 1; 131 if (returnClientPort == 0) { 132 // If the new port is invalid, find one - starting with the default client port. 133 // If the default client port is not specified, starting with a random port. 134 // The random port is selected from the range between 49152 to 65535. These ports cannot be 135 // registered with IANA and are intended for dynamic allocation (see http://bit.ly/dynports). 136 if (defaultClientPort > 0) { 137 returnClientPort = defaultClientPort; 138 } else { 139 returnClientPort = 0xc000 + ThreadLocalRandom.current().nextInt(0x3f00); 140 } 141 } 142 // Make sure that the port is unused. 143 // break when an unused port is found 144 do { 145 for (i = 0; i < clientPortList.size(); i++) { 146 if (returnClientPort == clientPortList.get(i)) { 147 // Already used. Update the port and retry. 148 returnClientPort++; 149 break; 150 } 151 } 152 } while (i != clientPortList.size()); 153 return returnClientPort; 154 } 155 156 public void setTickTime(int tickTime) { 157 this.tickTime = tickTime; 158 } 159 160 public int getBackupZooKeeperServerNum() { 161 return zooKeeperServers.size() - 1; 162 } 163 164 public int getZooKeeperServerNum() { 165 return zooKeeperServers.size(); 166 } 167 168 // / XXX: From o.a.zk.t.ClientBase 169 private static void setupTestEnv() { 170 // during the tests we run with 100K prealloc in the logs. 171 // on windows systems prealloc of 64M was seen to take ~15seconds 172 // resulting in test failure (client timeout on first session). 173 // set env and directly in order to handle static init/gc issues 174 System.setProperty("zookeeper.preAllocSize", "100"); 175 FileTxnLog.setPreallocSize(100 * 1024); 176 // allow all 4 letter words 177 System.setProperty("zookeeper.4lw.commands.whitelist", "*"); 178 } 179 180 public int startup(File baseDir) throws IOException, InterruptedException { 181 int numZooKeeperServers = clientPortList.size(); 182 if (numZooKeeperServers == 0) { 183 numZooKeeperServers = 1; // need at least 1 ZK server for testing 184 } 185 return startup(baseDir, numZooKeeperServers); 186 } 187 188 /** 189 * @param baseDir the base directory to use 190 * @param numZooKeeperServers the number of ZooKeeper servers 191 * @return ClientPort server bound to, -1 if there was a binding problem and we couldn't pick 192 * another port. 193 * @throws IOException if an operation fails during the startup 194 * @throws InterruptedException if the startup fails 195 */ 196 public int startup(File baseDir, int numZooKeeperServers) 197 throws IOException, InterruptedException { 198 if (numZooKeeperServers <= 0) { 199 return -1; 200 } 201 202 setupTestEnv(); 203 shutdown(); 204 205 int tentativePort = -1; // the seed port 206 int currentClientPort; 207 208 // running all the ZK servers 209 for (int i = 0; i < numZooKeeperServers; i++) { 210 File dir = new File(baseDir, "zookeeper_" + i).getAbsoluteFile(); 211 createDir(dir); 212 int tickTimeToUse; 213 if (this.tickTime > 0) { 214 tickTimeToUse = this.tickTime; 215 } else { 216 tickTimeToUse = TICK_TIME; 217 } 218 219 // Set up client port - if we have already had a list of valid ports, use it. 220 if (hasValidClientPortInList(i)) { 221 currentClientPort = clientPortList.get(i); 222 } else { 223 tentativePort = selectClientPort(tentativePort); // update the seed 224 currentClientPort = tentativePort; 225 } 226 227 ZooKeeperServer server = new ZooKeeperServer(dir, dir, tickTimeToUse); 228 // Setting {min,max}SessionTimeout defaults to be the same as in Zookeeper 229 server.setMinSessionTimeout( 230 configuration.getInt("hbase.zookeeper.property.minSessionTimeout", -1)); 231 server.setMaxSessionTimeout( 232 configuration.getInt("hbase.zookeeper.property.maxSessionTimeout", -1)); 233 NIOServerCnxnFactory standaloneServerFactory; 234 while (true) { 235 try { 236 standaloneServerFactory = new NIOServerCnxnFactory(); 237 String bindAddr = 238 configuration.get("hbase.zookeeper.property.clientPortAddress", LOOPBACK_HOST); 239 standaloneServerFactory.configure(new InetSocketAddress(bindAddr, currentClientPort), 240 configuration.getInt(HConstants.ZOOKEEPER_MAX_CLIENT_CNXNS, 241 HConstants.DEFAULT_ZOOKEEPER_MAX_CLIENT_CNXNS)); 242 } catch (BindException e) { 243 LOG.debug("Failed binding ZK Server to client port: " + currentClientPort, e); 244 // We're told to use some port but it's occupied, fail 245 if (hasValidClientPortInList(i)) { 246 return -1; 247 } 248 // This port is already in use, try to use another. 249 tentativePort = selectClientPort(tentativePort); 250 currentClientPort = tentativePort; 251 continue; 252 } 253 break; 254 } 255 256 // Start up this ZK server. Dump its stats. 257 standaloneServerFactory.startup(server); 258 LOG.info("Started connectionTimeout={}, dir={}, {}", connectionTimeout, dir, 259 getServerConfigurationOnOneLine(server)); 260 // Runs a 'stat' against the servers. 261 if (!waitForServerUp(currentClientPort, connectionTimeout)) { 262 Threads.printThreadInfo(System.out, "Why is zk standalone server not coming up?"); 263 throw new IOException( 264 "Waiting for startup of standalone server; " + "server isRunning=" + server.isRunning()); 265 } 266 267 // We have selected a port as a client port. Update clientPortList if necessary. 268 if (clientPortList.size() <= i) { // it is not in the list, add the port 269 clientPortList.add(currentClientPort); 270 } else if (clientPortList.get(i) <= 0) { // the list has invalid port, update with valid port 271 clientPortList.remove(i); 272 clientPortList.add(i, currentClientPort); 273 } 274 275 standaloneServerFactoryList.add(standaloneServerFactory); 276 zooKeeperServers.add(server); 277 } 278 279 // set the first one to be active ZK; Others are backups 280 activeZKServerIndex = 0; 281 started = true; 282 int clientPort = clientPortList.get(activeZKServerIndex); 283 LOG.info("Started MiniZooKeeperCluster and ran 'stat' on client port={}", clientPort); 284 return clientPort; 285 } 286 287 private String getServerConfigurationOnOneLine(ZooKeeperServer server) { 288 StringWriter sw = new StringWriter(); 289 try (PrintWriter pw = new PrintWriter(sw) { 290 @Override 291 public void println(int x) { 292 super.print(x); 293 super.print(", "); 294 } 295 296 @Override 297 public void println(String x) { 298 super.print(x); 299 super.print(", "); 300 } 301 }) { 302 server.dumpConf(pw); 303 } 304 return sw.toString(); 305 } 306 307 private void createDir(File dir) throws IOException { 308 try { 309 if (!dir.exists()) { 310 dir.mkdirs(); 311 } 312 } catch (SecurityException e) { 313 throw new IOException("creating dir: " + dir, e); 314 } 315 } 316 317 /** 318 * @throws IOException if waiting for the shutdown of a server fails 319 */ 320 public void shutdown() throws IOException { 321 // shut down all the zk servers 322 for (int i = 0; i < standaloneServerFactoryList.size(); i++) { 323 NIOServerCnxnFactory standaloneServerFactory = standaloneServerFactoryList.get(i); 324 int clientPort = clientPortList.get(i); 325 standaloneServerFactory.shutdown(); 326 if (!waitForServerDown(clientPort, connectionTimeout)) { 327 throw new IOException("Waiting for shutdown of standalone server at port=" + clientPort 328 + ", timeout=" + this.connectionTimeout); 329 } 330 } 331 standaloneServerFactoryList.clear(); 332 333 for (ZooKeeperServer zkServer : zooKeeperServers) { 334 // Explicitly close ZKDatabase since ZookeeperServer does not close them 335 zkServer.getZKDatabase().close(); 336 } 337 zooKeeperServers.clear(); 338 339 // clear everything 340 if (started) { 341 started = false; 342 activeZKServerIndex = 0; 343 clientPortList.clear(); 344 LOG.info("Shutdown MiniZK cluster with all ZK servers"); 345 } 346 } 347 348 /** 349 * @return clientPort return clientPort if there is another ZK backup can run when killing the 350 * current active; return -1, if there is no backups. 351 * @throws IOException if waiting for the shutdown of a server fails 352 */ 353 public int killCurrentActiveZooKeeperServer() throws IOException, InterruptedException { 354 if (!started || activeZKServerIndex < 0) { 355 return -1; 356 } 357 358 // Shutdown the current active one 359 NIOServerCnxnFactory standaloneServerFactory = 360 standaloneServerFactoryList.get(activeZKServerIndex); 361 int clientPort = clientPortList.get(activeZKServerIndex); 362 363 standaloneServerFactory.shutdown(); 364 if (!waitForServerDown(clientPort, connectionTimeout)) { 365 throw new IOException("Waiting for shutdown of standalone server"); 366 } 367 368 zooKeeperServers.get(activeZKServerIndex).getZKDatabase().close(); 369 370 // remove the current active zk server 371 standaloneServerFactoryList.remove(activeZKServerIndex); 372 clientPortList.remove(activeZKServerIndex); 373 zooKeeperServers.remove(activeZKServerIndex); 374 LOG.info("Kill the current active ZK servers in the cluster on client port: {}", clientPort); 375 376 if (standaloneServerFactoryList.isEmpty()) { 377 // there is no backup servers; 378 return -1; 379 } 380 clientPort = clientPortList.get(activeZKServerIndex); 381 LOG.info("Activate a backup zk server in the cluster on client port: {}", clientPort); 382 // return the next back zk server's port 383 return clientPort; 384 } 385 386 /** 387 * Kill one back up ZK servers. 388 * @throws IOException if waiting for the shutdown of a server fails 389 */ 390 public void killOneBackupZooKeeperServer() throws IOException, InterruptedException { 391 if (!started || activeZKServerIndex < 0 || standaloneServerFactoryList.size() <= 1) { 392 return; 393 } 394 395 int backupZKServerIndex = activeZKServerIndex + 1; 396 // Shutdown the current active one 397 NIOServerCnxnFactory standaloneServerFactory = 398 standaloneServerFactoryList.get(backupZKServerIndex); 399 int clientPort = clientPortList.get(backupZKServerIndex); 400 401 standaloneServerFactory.shutdown(); 402 if (!waitForServerDown(clientPort, connectionTimeout)) { 403 throw new IOException("Waiting for shutdown of standalone server"); 404 } 405 406 zooKeeperServers.get(backupZKServerIndex).getZKDatabase().close(); 407 408 // remove this backup zk server 409 standaloneServerFactoryList.remove(backupZKServerIndex); 410 clientPortList.remove(backupZKServerIndex); 411 zooKeeperServers.remove(backupZKServerIndex); 412 LOG.info("Kill one backup ZK servers in the cluster on client port: {}", clientPort); 413 } 414 415 // XXX: From o.a.zk.t.ClientBase. We just dropped the check for ssl/secure. 416 private static boolean waitForServerDown(int port, long timeout) throws IOException { 417 long start = System.currentTimeMillis(); 418 while (true) { 419 try { 420 send4LetterWord(HOST, port, "stat", false, (int) timeout); 421 } catch (IOException | X509Exception.SSLContextException e) { 422 return true; 423 } 424 425 if (System.currentTimeMillis() > start + timeout) { 426 break; 427 } 428 try { 429 Thread.sleep(TIMEOUT); 430 } catch (InterruptedException e) { 431 throw (InterruptedIOException) new InterruptedIOException().initCause(e); 432 } 433 } 434 return false; 435 } 436 437 // XXX: From o.a.zk.t.ClientBase. Its in the test jar but we don't depend on zk test jar. 438 // We remove the SSL/secure bit. Not used in here. 439 private static boolean waitForServerUp(int port, long timeout) throws IOException { 440 long start = System.currentTimeMillis(); 441 while (true) { 442 try { 443 String result = send4LetterWord(HOST, port, "stat", false, (int) timeout); 444 if (result.startsWith("Zookeeper version:") && !result.contains("READ-ONLY")) { 445 return true; 446 } else { 447 LOG.debug("Read {}", result); 448 } 449 } catch (ConnectException e) { 450 // ignore as this is expected, do not log stacktrace 451 LOG.info("{}:{} not up: {}", HOST, port, e.toString()); 452 } catch (IOException | X509Exception.SSLContextException e) { 453 // ignore as this is expected 454 LOG.info("{}:{} not up", HOST, port, e); 455 } 456 457 if (System.currentTimeMillis() > start + timeout) { 458 break; 459 } 460 try { 461 Thread.sleep(TIMEOUT); 462 } catch (InterruptedException e) { 463 throw (InterruptedIOException) new InterruptedIOException().initCause(e); 464 } 465 } 466 return false; 467 } 468 469 public int getClientPort() { 470 return activeZKServerIndex < 0 || activeZKServerIndex >= clientPortList.size() 471 ? -1 472 : clientPortList.get(activeZKServerIndex); 473 } 474 475 /** Returns Address for this cluster instance. */ 476 public Address getAddress() { 477 return Address.fromParts(HOST, getClientPort()); 478 } 479 480 List<ZooKeeperServer> getZooKeeperServers() { 481 return zooKeeperServers; 482 } 483}