selenium与PhantomJSDriver整合 加速 网站爬取

    xiaoxiao2021-04-15  67

    在使用 PhantomJSDriver 的时候 ,因为每次start client 是每次爬取 数据非常 在 30 s 左右 ,对于 源码的研究 自己改造 了 ,其中关键代码 已贴出

    源码: http://git.oschina.net/wds/contact

    package org.openqa.selenium.phantomjs; import java.io.IOException; import java.lang.reflect.Field; import java.util.Map; import org.openqa.selenium.Capabilities; import org.openqa.selenium.Platform; import org.openqa.selenium.remote.CapabilityType; import org.openqa.selenium.remote.DesiredCapabilities; import org.openqa.selenium.remote.DriverCommand; import org.openqa.selenium.remote.MyHttpCommandExecutor; import org.openqa.selenium.remote.RemoteWebDriver; import org.openqa.selenium.remote.Response; import com.google.common.collect.ImmutableMap; public class MyPhantomJSDriver extends PhantomJSDriver { private String mySessionId; private Capabilities desiredCapabilities; private Capabilities requiredCapabilities; private int port; public MyPhantomJSDriver(String mySessionId, int port) { super(port); this.mySessionId = mySessionId; this.port = port; try { startSession(); } catch (RuntimeException e) { try { quit(); } catch (Exception localException1) { } throw e; } } protected void startSession() { if (this.mySessionId != null && !this.mySessionId.isEmpty()) { // // URL driverserver = new URL(localServer); // MyHttpCommandExecutor delegate = new PhantomJSCommandExecutor( // PhantomJSDriverService.createDefaultServiceWithPort(desiredCapabilities, this.port)); // // HttpCommandExecutor(driverserver); // // try { // // TODO: use a more intelligent way of testing if the server is // // ready. // delegate.getAddressOfRemoteServer().openConnection().connect(); // super.setCommandExecutor(delegate); // // } catch (IOException e) { // e.printStackTrace(); // } super.setSessionId(this.mySessionId); // Command command = new Command(super.getSessionId(), // DriverCommand.GET_CAPABILITIES); ImmutableMap.Builder<String, Capabilities> paramBuilder = new ImmutableMap.Builder(); paramBuilder.put("desiredCapabilities", desiredCapabilities); if (requiredCapabilities != null) { paramBuilder.put("requiredCapabilities", requiredCapabilities); } Map<String, ?> parameters = paramBuilder.build(); Response response = execute(DriverCommand.GET_CAPABILITIES, parameters); Map<String, Object> rawCapabilities = (Map<String, Object>) response.getValue(); DesiredCapabilities returnedCapabilities = (DesiredCapabilities) super.getCapabilities(); if (returnedCapabilities == null) { returnedCapabilities = new DesiredCapabilities(); } for (Map.Entry<String, Object> entry : rawCapabilities.entrySet()) { // Handle the platform later if (CapabilityType.PLATFORM.equals(entry.getKey())) { continue; } returnedCapabilities.setCapability(entry.getKey(), entry.getValue()); } String platformString = (String) rawCapabilities.get(CapabilityType.PLATFORM); Platform platform; try { if (platformString == null || "".equals(platformString)) { platform = Platform.ANY; } else { platform = Platform.valueOf(platformString); } } catch (IllegalArgumentException e) { // The server probably responded with a name matching the // os.name // system property. Try to recover and parse this. platform = Platform.extractFromSysProperty(platformString); } returnedCapabilities.setPlatform(platform); // this.myCapabilities = returnedCapabilities; try { Field f = RemoteWebDriver.class.getDeclaredField("capabilities"); f.setAccessible(true); f.set(this, returnedCapabilities); } catch (Exception e) { e.printStackTrace(); } } else { super.startSession(desiredCapabilities, requiredCapabilities); } } @Override protected void startSession(Capabilities desiredCapabilities, Capabilities requiredCapabilities) { this.desiredCapabilities = desiredCapabilities; this.requiredCapabilities = requiredCapabilities; } } /* This file is part of the GhostDriver by Ivan De Marino <http://ivandemarino.me>. Copyright (c) 2012-2014, Ivan De Marino <http://ivandemarino.me> All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ package org.openqa.selenium.phantomjs; import java.io.IOException; import java.net.ConnectException; import java.net.InetSocketAddress; import java.net.Socket; import java.net.SocketAddress; import org.openqa.selenium.WebDriverException; import org.openqa.selenium.remote.Command; import org.openqa.selenium.remote.DriverCommand; import org.openqa.selenium.remote.MyHttpCommandExecutor; import org.openqa.selenium.remote.Response; import com.google.common.base.Throwables; /** * A specialized {@link org.openqa.selenium.remote.MyHttpCommandExecutor} that * will use a {@link PhantomJSDriverService} that lives and dies with a single * WebDriver session. * <p/> * The service will be restarted upon each new session request and shutdown * after each quit command. * <p/> * NOTE: Yes, the design of this class is heavily inspired by * {@link org.openqa.selenium.chrome.ChromeCommandExecutor}. * * @author Ivan De Marino <http://ivandemarino.me> */ class PhantomJSCommandExecutor extends MyHttpCommandExecutor { private final PhantomJSDriverService service; /** * Creates a new PhantomJSCommandExecutor. The PhantomJSCommandExecutor will * communicate with the PhantomJS/GhostDriver through the given * {@code service}. * * @param service * The PhantomJSDriverService to send commands to. */ PhantomJSCommandExecutor(PhantomJSDriverService service) { super(PhantomJSDriver.getCustomCommands(), service.getUrl()); this.service = service; } /** * Sends the {@code command} to the PhantomJS/GhostDriver server for * execution. The server will be started if requesting a new session. * Likewise, if terminating a session, the server will be shutdown once a * response is received. * * @param command * The command to execute. * @return The command response. * @throws java.io.IOException * If an I/O error occurs while sending the command. */ @Override public Response execute(Command command) { Socket socket = new Socket(); try { SocketAddress remoteAddr = new InetSocketAddress("127.0.0.1", service.getPort()); socket.connect(remoteAddr, 3000); } catch (IOException e) { if (DriverCommand.NEW_SESSION.equals(command.getName())) { try { service.start(); } catch (IOException e1) { e1.printStackTrace(); } } e.printStackTrace(); } finally { try { socket.close(); } catch (IOException e) { e.printStackTrace(); } } try { return super.execute(command); } catch (Throwable t) { Throwable rootCause = Throwables.getRootCause(t); if (rootCause instanceof ConnectException && "Connection refused".equals(rootCause.getMessage()) && !service.isRunning()) { throw new WebDriverException("The PhantomJS/GhostDriver server has unexpectedly died!", t); } Throwables.propagateIfPossible(t); throw new WebDriverException(t); } finally { if (DriverCommand.QUIT.equals(command.getName())) { service.stop(); } } } }

    转载请注明原文地址: https://ju.6miu.com/read-670767.html

    最新回复(0)