使用过python操作selenium来抓取页面数据后,寻思php是否也一样可以
#项目地址: https://github.com/php-webdriver/php-webdriver
composer require php-webdriver/webdriver
下载的google可执行文件要和本地google版本一致
查看本地google版本信息
#在chrome输入进行查看 chrome://version/
下载所需文件
# 到下面地址下载对应的chrome文件 https://chromedriver.storage.googleapis.com/index.html
./chromedirver --port=4444
declare (strict_types=1);
namespace App\Common\Lib\Mp;
use App\Exception\ScanTimeOutException;
use Facebook\WebDriver\Chrome\ChromeOptions;
use Facebook\WebDriver\Remote\DesiredCapabilities;
use Facebook\WebDriver\Remote\RemoteWebDriver;
use Facebook\WebDriver\WebDriverBy;
use Facebook\WebDriver\WebDriverExpectedCondition;
/**
* 模拟登录
*/
class LoginSimulation
{
private $driver;
protected string $loginUrl = "https://mp.weixin.qq.com/";
/**
* 初始化webdriver
*/
public function __construct()
{
$host = config('webdriver.host');
$debug = config('webdriver.debug');
$options = new ChromeOptions();
//线上部署需要无界面启动
if ($debug == false) {
//无界面启动
$options->addArguments([
'--no-sandbox',
'--headless',
'--disable-gpu',
'--disable-dev-shm-usage'
]);
}
try {
$capabilities = DesiredCapabilities::chrome();
$capabilities->setCapability(ChromeOptions::CAPABILITY, $options);
$driver = RemoteWebDriver::create($host, $capabilities, 5000);
$this->driver = $driver;
} catch (\Exception $e) {
throw new \RuntimeException("webdriver初始化失败");
}
}
/**
* 登录动作
* @param string $username
* @param string $password
* @return $this
*/
public function login(string $username, string $password): self
{
$driver = $this->driver;
//登录地址
$driver->get($this->loginUrl);
$driver->manage()->deleteAllCookies();
//使用账号登录
$driver->findElement(WebDriverBy::linkText("使用帐号登录"))->click();
//输入账号密码登录
$driver->findElement(WebDriverBy::name("account"))->sendKeys($username);
$driver->findElement(WebDriverBy::name("password"))->sendKeys($password);
$driver->findElement(WebDriverBy::className('btn_login'))->click();
return $this;
}
/**
* 获取登录的二维码信息
* @return array
*/
public function getLoginQrcode(): array
{
$driver = $this->driver;
try {
//等待页面跳转,判断也账号密码是否正确
//todo 多次登录失败会出现验证码
//todo 需要登录失败次数过多保护策略
$driver->wait(5, 1000)->until(
WebDriverExpectedCondition::presenceOfElementLocated(WebDriverBy::className("js_qrcode"))
);
//获取登录的二维码
$qrcode = $driver->findElement(WebDriverBy::className("js_qrcode"))->getAttribute("src");
$words = $driver->findElement(WebDriverBy::className("js_wording"))->getText();
//将qrcode转为base64图片样式
$encode = $this->loginQrcodeToBase64($qrcode);
return [
'qrcode' => $encode,
'mp_name' => $words
];
} catch (\Throwable $e) {
throw new \RuntimeException("'您输入的帐号或者密码不正确'或'或失败次数过多出现验证码'或'网络不可达'");
}
}
/**
* 将登录的二维码转base64格式
* @param string $qrcode 二维码地址
* @return string
*/
public function loginQrcodeToBase64(string $qrcode): string
{
$driver = $this->driver;
//这边二维码是需要cookie才能展示,这边获取图片然后将base64后转给前端
$cookies = $driver->manage()->getCookies();
$cookieStr = '';
foreach ($cookies as $cookie) {
$tmp = $cookie->toArray();
$cookieStr .= $tmp['name'] . '=' . $tmp['value'] . "; ";
}
$curl = curl_init();
curl_setopt_array($curl, array(
CURLOPT_URL => $qrcode,
CURLOPT_RETURNTRANSFER => true,
CURLOPT_ENCODING => '',
CURLOPT_MAXREDIRS => 10,
CURLOPT_TIMEOUT => 0,
CURLOPT_FOLLOWLOCATION => true,
CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
CURLOPT_CUSTOMREQUEST => 'GET',
CURLOPT_HTTPHEADER => array(
"cookie:" . $cookieStr
),
));
$response = curl_exec($curl);
curl_close($curl);
$base64 = chunk_split(base64_encode($response));
$encode = "data:image/jpg/png/gifScanTimeOutException;base64," . $base64;
return $encode;
}
/**
* 等待扫码
* @param int $timeout
* @return array
*/
public function waitScan(int $timeout = 5): array
{
$driver = $this->driver;
try {
$driver->wait(60 * $timeout, 1500)->until(
WebDriverExpectedCondition::presenceOfElementLocated(WebDriverBy::id("menuBar"))
);
$indexUrl = $driver->getCurrentURL(); //需要解析url中的token
$cookies = $driver->manage()->getCookies();
$cookieArr = [];
foreach ($cookies as $cookie) {
array_push($cookieArr, $cookie->toArray());
}
return [
'url' => $indexUrl,
'cookie' => $cookieArr
];
} catch (\Throwable $e) {
throw new ScanTimeOutException("未有扫码或扫码超时");
}
}
/**
* 关闭浏览器
*/
public function quit()
{
$this->driver->quit();
}
}
php-fpm停止运行,重启的时候提示
unable to bind listening socket for address '/tmp/php-cgi-73.sock': No space left on device (28)
对服务器进行升级重启后,发现原来挂载磁盘的路劲内容为空
a. 您好,您是重启后磁盘没有挂载吗?您使用df -h
和fdisk -l
命令这边看下
➜ /www df -h
Filesystem Size Used Avail Use% Mounted on
/dev/vda1 40G 2.7G 35G 7% /
devtmpfs 12G 0 12G 0% /dev
tmpfs 12G 0 12G 0% /dev/shm
tmpfs 12G 504K 12G 1% /run
tmpfs 12G 0 12G 0% /sys/fs/cgroup
tmpfs 2.4G 0 2.4G 0% /run/user/1003
➜ /www fdisk -l
Disk /dev/vda: 42.9 GB, 42949672960 bytes, 83886080 sectors
Units = sectors of 1 * 512 = 512 bytes
Sector size (logical/physical): 512 bytes / 512 bytes
I/O size (minimum/optimal): 512 bytes / 512 bytes
Disk label type: dos
Disk identifier: 0x000b2d99
Device Boot Start End Blocks Id System
/dev/vda1 * 2048 83875364 41936658+ 83 Linux
Disk /dev/vdb: 21.5 GB, 21474836480 bytes, 41943040 sectors
Units = sectors of 1 * 512 = 512 bytes
Sector size (logical/physical): 512 bytes / 512 bytes
I/O size (minimum/optimal): 512 bytes / 512 bytes
Disk label type: dos
Disk identifier: 0xb6083888
Device Boot Start End Blocks Id System
/dev/vdb1 2048 41943039 20970496 83 Linux
a. 您好,这边看您有一个数据盘没有挂载的,您原来这个磁盘是挂载到了那个目录呢
原来的磁盘挂载到 /www 我现在怕操作错了把数据弄没了,现在如何恢复
a.您好 您使用 mount /dev/vdb1 /www
挂载上试下
出现问题不要慌,不要乱操作以免数据丢失,共勉
阿里云购买服务器,若另外购买数据盘,则此时需要将数据盘挂载到系统盘上
若磁盘是新购买的,或磁盘里面无重要数据按此法来挂载
本次使用的是centOS6.7 64位系统
第一步:处理阿里云磁盘挂载问题
查看磁盘情况,本次机器系统盘为阿里云赠送的20G,数据盘为100G
其中/dev/xvda为系统盘,/dev/xvdb为数据盘,数据盘暂未做任何操作,要使用的话,先做挂载
输入fdisk /dev/xvdb 对数据盘进行分区。根据提示,输入 n, p, 1, 回车,回车, wq
紧接着输入命令mkfs.ext3 /dev/xvdb1 (这边建议使用ext4格式),格式化磁盘,耐心等待即可
接下来挂载此磁盘到目录,挂载目录因人而异,我习惯挂载到/data0下(因为假如以后又买了第二块数据盘,我可以挂载到/data1、/data2……等等以此类推)
输入mkdir /data0,然后输入mount /dev/xvdb1 /data0
然后写入分区表:echo ‘/dev/xvdb1 /data0 ext3 defaults 0 0’ » /etc/fstab
磁盘挂载大功告成,然后输入命令df -h查看,就能看到/dev/xvdb1已经挂在到/data0目录下了,以后cd /data0其实就是访问这块数据盘,而不是系统盘了,我们可以将网站和数据库以及日志等容易占用磁盘空间的文件放在这里