Files
links/application/common/library/scrm/AbstractScrmSpider.php
T
2026-06-09 03:36:30 +08:00

166 lines
5.9 KiB
PHP

<?php
declare(strict_types=1);
namespace app\common\library\scrm;
use Exception;
/**
* 云控蜘蛛抽象基类(Node Headless 拦截 + 翻页 + 清洗)
*/
abstract class AbstractScrmSpider implements ScrmSpiderInterface
{
public const MODE_FETCH = 'fetch';
public const MODE_UI = 'ui_click';
protected string $nodeHost;
public function __construct(string $nodeHost = 'http://127.0.0.1:3001')
{
$this->nodeHost = rtrim($nodeHost, '/');
}
/** @return array<string, mixed> */
abstract protected function getSpiderConfig(): array;
/**
* @param array<string, mixed>|null $countData
*/
abstract protected function extractListTotalPages($listFirstPageData, $countData = null);
/**
* @return array<string, mixed>
*/
abstract protected function buildListPageParams(int $page): array;
/** @return array<string, mixed> */
abstract protected function getUiPaginationConfig(): array;
/**
* @param mixed $detailData
* @param array<int, mixed> $allListPagesData
*/
abstract protected function parseToUnifiedData($detailData, array $allListPagesData): UnifiedScrmData;
public function run(): UnifiedScrmData
{
$config = $this->getSpiderConfig();
$listApi = (string) ($config['listApi'] ?? '');
$detailApi = $config['detailApi'] ?? null;
$countApi = $config['countApi'] ?? null;
$apiUrlsToIntercept = [$listApi];
if ($detailApi) {
$apiUrlsToIntercept[] = $detailApi;
}
if ($countApi) {
$apiUrlsToIntercept[] = $countApi;
}
$initResult = $this->requestNode('/api/auth-and-intercept', [
'pageUrl' => $config['pageUrl'],
'apiUrls' => $apiUrlsToIntercept,
'authActions' => $config['authActions'] ?? [],
]);
if (empty($initResult['success'])) {
throw new Exception('初始化失败: ' . ($initResult['error'] ?? '未知'));
}
$interceptedApis = $initResult['interceptedApis'];
$cookies = $initResult['cookies'];
if (!isset($interceptedApis[$listApi])) {
throw new Exception("致命错误:未能拦截到必须的列表接口 [{$listApi}]");
}
$detailData = $detailApi && isset($interceptedApis[$detailApi])
? $interceptedApis[$detailApi]['data'] : null;
$countData = $countApi && isset($interceptedApis[$countApi])
? $interceptedApis[$countApi]['data'] : null;
$listApiNode = $interceptedApis[$listApi];
$allListPagesData = [$listApiNode['data']];
$totalPages = $this->extractListTotalPages($listApiNode['data'], $countData);
$mode = $config['paginationMode'] ?? self::MODE_FETCH;
if ($totalPages > 1 || $totalPages === null) {
if ($mode === self::MODE_FETCH && $totalPages !== null) {
$paramList = [];
for ($page = 2; $page <= $totalPages; $page++) {
$paramList[] = $this->buildListPageParams($page);
}
$fetchResult = $this->requestNode('/api/batch-fetch', [
'tasks' => [[
'apiPath' => $listApi,
'fullUrl' => $listApiNode['url'],
'headers' => $listApiNode['headers'] ?? '',
'paramList' => $paramList,
'method' => $config['listMethod'] ?? 'GET',
]],
'cookies' => $cookies,
], 120);
if (!empty($fetchResult['success'])) {
foreach ($fetchResult['results'][$listApi] as $pResult) {
if (!empty($pResult['success'])) {
$allListPagesData[] = $pResult['data'];
}
}
}
} elseif ($mode === self::MODE_UI) {
$uiConfig = $this->getUiPaginationConfig();
$firstPageData = $listApiNode['data'];
$clicksToPerform = ($totalPages === null) ? 9999 : ($totalPages - 1);
$uiResult = $this->requestNode('/api/ui-pagination', [
'apiUrl' => $listApi,
'pageUrl' => $config['pageUrl'],
'nextBtnSelector' => $uiConfig['nextBtnSelector'] ?? '',
'waitMs' => $uiConfig['waitMs'] ?? 2000,
'clicksToPerform' => $clicksToPerform,
'cookies' => $cookies,
'firstPageData' => $firstPageData,
'authActions' => $config['authActions'] ?? [],
], 1200);
if (!empty($uiResult['success']) && !empty($uiResult['data'])) {
foreach ($uiResult['data'] as $pageData) {
$allListPagesData[] = $pageData;
}
}
}
}
return $this->parseToUnifiedData($detailData, $allListPagesData);
}
/**
* @param array<string, mixed> $payload
* @return array<string, mixed>
*/
protected function requestNode(string $endpoint, array $payload, int $timeout = 60): array
{
$ch = curl_init($this->nodeHost . $endpoint);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($payload, JSON_UNESCAPED_UNICODE));
curl_setopt($ch, CURLOPT_HTTPHEADER, ['Content-Type: application/json']);
curl_setopt($ch, CURLOPT_TIMEOUT, $timeout);
$response = curl_exec($ch);
if (curl_errno($ch)) {
$err = curl_error($ch);
curl_close($ch);
throw new Exception($err);
}
curl_close($ch);
$decoded = json_decode((string) $response, true);
return is_array($decoded) ? $decoded : [];
}
}