场景
使用过python操作selenium来抓取页面数据后,寻思php是否也一样可以
安装webdriver
#项目地址: https://github.com/php-webdriver/php-webdriver
composer require php-webdriver/webdriver
下载对应google可执行文件
下载的google可执行文件要和本地google版本一致
查看本地google版本信息
#在chrome输入进行查看 chrome://version/
下载所需文件
# 到下面地址下载对应的chrome文件 https://chromedriver.storage.googleapis.com/index.html
启动google可执行文件
./chromedirver --port=4444
实例代码
declare (strict_types=1);
namespace App\Common\Lib\Mp;
use App\Exception\ScanTimeOutException;
use Facebook\WebDriver\Chrome\ChromeOptions;
use Facebook\WebDriver\Remote\DesiredCapabilities;
use Facebook\WebDriver\Remote\RemoteWebDriver;
use Facebook\WebDriver\WebDriverBy;
use Facebook\WebDriver\WebDriverExpectedCondition;
/**
* 模拟登录
*/
class LoginSimulation
{
private $driver;
protected string $loginUrl = "https://mp.weixin.qq.com/";
/**
* 初始化webdriver
*/
public function __construct()
{
$host = config('webdriver.host');
$debug = config('webdriver.debug');
$options = new ChromeOptions();
//线上部署需要无界面启动
if ($debug == false) {
//无界面启动
$options->addArguments([
'--no-sandbox',
'--headless',
'--disable-gpu',
'--disable-dev-shm-usage'
]);
}
try {
$capabilities = DesiredCapabilities::chrome();
$capabilities->setCapability(ChromeOptions::CAPABILITY, $options);
$driver = RemoteWebDriver::create($host, $capabilities, 5000);
$this->driver = $driver;
} catch (\Exception $e) {
throw new \RuntimeException("webdriver初始化失败");
}
}
/**
* 登录动作
* @param string $username
* @param string $password
* @return $this
*/
public function login(string $username, string $password): self
{
$driver = $this->driver;
//登录地址
$driver->get($this->loginUrl);
$driver->manage()->deleteAllCookies();
//使用账号登录
$driver->findElement(WebDriverBy::linkText("使用帐号登录"))->click();
//输入账号密码登录
$driver->findElement(WebDriverBy::name("account"))->sendKeys($username);
$driver->findElement(WebDriverBy::name("password"))->sendKeys($password);
$driver->findElement(WebDriverBy::className('btn_login'))->click();
return $this;
}
/**
* 获取登录的二维码信息
* @return array
*/
public function getLoginQrcode(): array
{
$driver = $this->driver;
try {
//等待页面跳转,判断也账号密码是否正确
//todo 多次登录失败会出现验证码
//todo 需要登录失败次数过多保护策略
$driver->wait(5, 1000)->until(
WebDriverExpectedCondition::presenceOfElementLocated(WebDriverBy::className("js_qrcode"))
);
//获取登录的二维码
$qrcode = $driver->findElement(WebDriverBy::className("js_qrcode"))->getAttribute("src");
$words = $driver->findElement(WebDriverBy::className("js_wording"))->getText();
//将qrcode转为base64图片样式
$encode = $this->loginQrcodeToBase64($qrcode);
return [
'qrcode' => $encode,
'mp_name' => $words
];
} catch (\Throwable $e) {
throw new \RuntimeException("'您输入的帐号或者密码不正确'或'或失败次数过多出现验证码'或'网络不可达'");
}
}
/**
* 将登录的二维码转base64格式
* @param string $qrcode 二维码地址
* @return string
*/
public function loginQrcodeToBase64(string $qrcode): string
{
$driver = $this->driver;
//这边二维码是需要cookie才能展示,这边获取图片然后将base64后转给前端
$cookies = $driver->manage()->getCookies();
$cookieStr = '';
foreach ($cookies as $cookie) {
$tmp = $cookie->toArray();
$cookieStr .= $tmp['name'] . '=' . $tmp['value'] . "; ";
}
$curl = curl_init();
curl_setopt_array($curl, array(
CURLOPT_URL => $qrcode,
CURLOPT_RETURNTRANSFER => true,
CURLOPT_ENCODING => '',
CURLOPT_MAXREDIRS => 10,
CURLOPT_TIMEOUT => 0,
CURLOPT_FOLLOWLOCATION => true,
CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
CURLOPT_CUSTOMREQUEST => 'GET',
CURLOPT_HTTPHEADER => array(
"cookie:" . $cookieStr
),
));
$response = curl_exec($curl);
curl_close($curl);
$base64 = chunk_split(base64_encode($response));
$encode = "data:image/jpg/png/gifScanTimeOutException;base64," . $base64;
return $encode;
}
/**
* 等待扫码
* @param int $timeout
* @return array
*/
public function waitScan(int $timeout = 5): array
{
$driver = $this->driver;
try {
$driver->wait(60 * $timeout, 1500)->until(
WebDriverExpectedCondition::presenceOfElementLocated(WebDriverBy::id("menuBar"))
);
$indexUrl = $driver->getCurrentURL(); //需要解析url中的token
$cookies = $driver->manage()->getCookies();
$cookieArr = [];
foreach ($cookies as $cookie) {
array_push($cookieArr, $cookie->toArray());
}
return [
'url' => $indexUrl,
'cookie' => $cookieArr
];
} catch (\Throwable $e) {
throw new ScanTimeOutException("未有扫码或扫码超时");
}
}
/**
* 关闭浏览器
*/
public function quit()
{
$this->driver->quit();
}
}