2026-06-09 03:36:30 +08:00
|
|
|
<?php
|
|
|
|
|
|
|
|
|
|
declare(strict_types=1);
|
|
|
|
|
|
|
|
|
|
namespace app\common\library\scrm;
|
|
|
|
|
|
2026-06-10 06:44:57 +08:00
|
|
|
use app\common\service\SplitTicketSyncLogger;
|
2026-06-09 03:36:30 +08:00
|
|
|
use Exception;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* 云控蜘蛛抽象基类(Node Headless 拦截 + 翻页 + 清洗)
|
|
|
|
|
*/
|
|
|
|
|
abstract class AbstractScrmSpider implements ScrmSpiderInterface
|
|
|
|
|
{
|
|
|
|
|
public const MODE_FETCH = 'fetch';
|
|
|
|
|
|
|
|
|
|
public const MODE_UI = 'ui_click';
|
|
|
|
|
|
|
|
|
|
protected string $nodeHost;
|
|
|
|
|
|
|
|
|
|
public function __construct(string $nodeHost = 'http://127.0.0.1:3001')
|
|
|
|
|
{
|
|
|
|
|
$this->nodeHost = rtrim($nodeHost, '/');
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/** @return array<string, mixed> */
|
|
|
|
|
abstract protected function getSpiderConfig(): array;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @param array<string, mixed>|null $countData
|
|
|
|
|
*/
|
|
|
|
|
abstract protected function extractListTotalPages($listFirstPageData, $countData = null);
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @return array<string, mixed>
|
|
|
|
|
*/
|
|
|
|
|
abstract protected function buildListPageParams(int $page): array;
|
|
|
|
|
|
|
|
|
|
/** @return array<string, mixed> */
|
|
|
|
|
abstract protected function getUiPaginationConfig(): array;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @param mixed $detailData
|
|
|
|
|
* @param array<int, mixed> $allListPagesData
|
|
|
|
|
*/
|
|
|
|
|
abstract protected function parseToUnifiedData($detailData, array $allListPagesData): UnifiedScrmData;
|
|
|
|
|
|
|
|
|
|
public function run(): UnifiedScrmData
|
|
|
|
|
{
|
|
|
|
|
$config = $this->getSpiderConfig();
|
2026-06-10 06:44:57 +08:00
|
|
|
SplitTicketSyncLogger::log('spider', 'run start', [
|
|
|
|
|
'nodeHost' => $this->nodeHost,
|
|
|
|
|
'pageUrl' => $config['pageUrl'] ?? '',
|
|
|
|
|
'listApi' => $config['listApi'] ?? '',
|
|
|
|
|
'paginationMode' => $config['paginationMode'] ?? self::MODE_FETCH,
|
|
|
|
|
]);
|
2026-06-09 03:36:30 +08:00
|
|
|
|
|
|
|
|
$listApi = (string) ($config['listApi'] ?? '');
|
|
|
|
|
$detailApi = $config['detailApi'] ?? null;
|
|
|
|
|
$countApi = $config['countApi'] ?? null;
|
|
|
|
|
|
|
|
|
|
$apiUrlsToIntercept = [$listApi];
|
|
|
|
|
if ($detailApi) {
|
|
|
|
|
$apiUrlsToIntercept[] = $detailApi;
|
|
|
|
|
}
|
|
|
|
|
if ($countApi) {
|
|
|
|
|
$apiUrlsToIntercept[] = $countApi;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
$initResult = $this->requestNode('/api/auth-and-intercept', [
|
|
|
|
|
'pageUrl' => $config['pageUrl'],
|
|
|
|
|
'apiUrls' => $apiUrlsToIntercept,
|
|
|
|
|
'authActions' => $config['authActions'] ?? [],
|
|
|
|
|
]);
|
|
|
|
|
|
|
|
|
|
if (empty($initResult['success'])) {
|
2026-06-10 06:44:57 +08:00
|
|
|
SplitTicketSyncLogger::log('spider', 'auth-and-intercept failed', [
|
|
|
|
|
'error' => $initResult['error'] ?? '未知',
|
|
|
|
|
]);
|
2026-06-09 03:36:30 +08:00
|
|
|
throw new Exception('初始化失败: ' . ($initResult['error'] ?? '未知'));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
$interceptedApis = $initResult['interceptedApis'];
|
2026-06-10 06:44:57 +08:00
|
|
|
SplitTicketSyncLogger::log('spider', 'auth-and-intercept ok', [
|
|
|
|
|
'intercepted' => array_keys($interceptedApis),
|
|
|
|
|
]);
|
2026-06-09 03:36:30 +08:00
|
|
|
$cookies = $initResult['cookies'];
|
|
|
|
|
|
|
|
|
|
if (!isset($interceptedApis[$listApi])) {
|
|
|
|
|
throw new Exception("致命错误:未能拦截到必须的列表接口 [{$listApi}]");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
$detailData = $detailApi && isset($interceptedApis[$detailApi])
|
|
|
|
|
? $interceptedApis[$detailApi]['data'] : null;
|
|
|
|
|
$countData = $countApi && isset($interceptedApis[$countApi])
|
|
|
|
|
? $interceptedApis[$countApi]['data'] : null;
|
|
|
|
|
|
|
|
|
|
$listApiNode = $interceptedApis[$listApi];
|
|
|
|
|
$allListPagesData = [$listApiNode['data']];
|
|
|
|
|
|
|
|
|
|
$totalPages = $this->extractListTotalPages($listApiNode['data'], $countData);
|
|
|
|
|
$mode = $config['paginationMode'] ?? self::MODE_FETCH;
|
2026-06-10 06:44:57 +08:00
|
|
|
SplitTicketSyncLogger::log('spider', 'pagination plan', [
|
|
|
|
|
'totalPages' => $totalPages,
|
|
|
|
|
'mode' => $mode,
|
|
|
|
|
]);
|
2026-06-09 03:36:30 +08:00
|
|
|
|
|
|
|
|
if ($totalPages > 1 || $totalPages === null) {
|
|
|
|
|
if ($mode === self::MODE_FETCH && $totalPages !== null) {
|
|
|
|
|
$paramList = [];
|
|
|
|
|
for ($page = 2; $page <= $totalPages; $page++) {
|
|
|
|
|
$paramList[] = $this->buildListPageParams($page);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
$fetchResult = $this->requestNode('/api/batch-fetch', [
|
|
|
|
|
'tasks' => [[
|
|
|
|
|
'apiPath' => $listApi,
|
|
|
|
|
'fullUrl' => $listApiNode['url'],
|
|
|
|
|
'headers' => $listApiNode['headers'] ?? '',
|
|
|
|
|
'paramList' => $paramList,
|
|
|
|
|
'method' => $config['listMethod'] ?? 'GET',
|
|
|
|
|
]],
|
|
|
|
|
'cookies' => $cookies,
|
|
|
|
|
], 120);
|
|
|
|
|
|
|
|
|
|
if (!empty($fetchResult['success'])) {
|
|
|
|
|
foreach ($fetchResult['results'][$listApi] as $pResult) {
|
|
|
|
|
if (!empty($pResult['success'])) {
|
|
|
|
|
$allListPagesData[] = $pResult['data'];
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
} elseif ($mode === self::MODE_UI) {
|
|
|
|
|
$uiConfig = $this->getUiPaginationConfig();
|
|
|
|
|
$firstPageData = $listApiNode['data'];
|
|
|
|
|
$clicksToPerform = ($totalPages === null) ? 9999 : ($totalPages - 1);
|
|
|
|
|
|
|
|
|
|
$uiResult = $this->requestNode('/api/ui-pagination', [
|
|
|
|
|
'apiUrl' => $listApi,
|
|
|
|
|
'pageUrl' => $config['pageUrl'],
|
|
|
|
|
'nextBtnSelector' => $uiConfig['nextBtnSelector'] ?? '',
|
|
|
|
|
'waitMs' => $uiConfig['waitMs'] ?? 2000,
|
|
|
|
|
'clicksToPerform' => $clicksToPerform,
|
|
|
|
|
'cookies' => $cookies,
|
|
|
|
|
'firstPageData' => $firstPageData,
|
|
|
|
|
'authActions' => $config['authActions'] ?? [],
|
|
|
|
|
], 1200);
|
|
|
|
|
|
|
|
|
|
if (!empty($uiResult['success']) && !empty($uiResult['data'])) {
|
|
|
|
|
foreach ($uiResult['data'] as $pageData) {
|
|
|
|
|
$allListPagesData[] = $pageData;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2026-06-10 06:44:57 +08:00
|
|
|
$result = $this->parseToUnifiedData($detailData, $allListPagesData);
|
|
|
|
|
SplitTicketSyncLogger::log('spider', 'run done', [
|
|
|
|
|
'todayNewCount' => $result->todayNewCount,
|
|
|
|
|
'totalOnline' => $result->totalOnline,
|
|
|
|
|
'totalOffline' => $result->totalOffline,
|
|
|
|
|
'total' => $result->total,
|
|
|
|
|
'numberCount' => count($result->numbers),
|
|
|
|
|
]);
|
|
|
|
|
return $result;
|
2026-06-09 03:36:30 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @param array<string, mixed> $payload
|
|
|
|
|
* @return array<string, mixed>
|
|
|
|
|
*/
|
|
|
|
|
protected function requestNode(string $endpoint, array $payload, int $timeout = 60): array
|
|
|
|
|
{
|
2026-06-10 06:44:57 +08:00
|
|
|
$url = $this->nodeHost . $endpoint;
|
|
|
|
|
$started = microtime(true);
|
|
|
|
|
SplitTicketSyncLogger::log('node_request', 'POST ' . $endpoint, [
|
|
|
|
|
'url' => $url,
|
|
|
|
|
'timeout' => $timeout,
|
|
|
|
|
'payload' => $payload,
|
|
|
|
|
]);
|
|
|
|
|
|
|
|
|
|
$ch = curl_init($url);
|
2026-06-09 03:36:30 +08:00
|
|
|
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
|
|
|
|
|
curl_setopt($ch, CURLOPT_POST, true);
|
|
|
|
|
curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($payload, JSON_UNESCAPED_UNICODE));
|
|
|
|
|
curl_setopt($ch, CURLOPT_HTTPHEADER, ['Content-Type: application/json']);
|
|
|
|
|
curl_setopt($ch, CURLOPT_TIMEOUT, $timeout);
|
|
|
|
|
$response = curl_exec($ch);
|
2026-06-10 06:44:57 +08:00
|
|
|
$httpCode = (int) curl_getinfo($ch, CURLINFO_HTTP_CODE);
|
|
|
|
|
$elapsedMs = (int) round((microtime(true) - $started) * 1000);
|
2026-06-09 03:36:30 +08:00
|
|
|
if (curl_errno($ch)) {
|
|
|
|
|
$err = curl_error($ch);
|
|
|
|
|
curl_close($ch);
|
2026-06-10 06:44:57 +08:00
|
|
|
SplitTicketSyncLogger::log('node_response', 'curl error on ' . $endpoint, [
|
|
|
|
|
'httpCode' => $httpCode,
|
|
|
|
|
'elapsedMs' => $elapsedMs,
|
|
|
|
|
'error' => $err,
|
|
|
|
|
]);
|
2026-06-09 03:36:30 +08:00
|
|
|
throw new Exception($err);
|
|
|
|
|
}
|
|
|
|
|
curl_close($ch);
|
|
|
|
|
$decoded = json_decode((string) $response, true);
|
2026-06-10 06:44:57 +08:00
|
|
|
$summary = is_array($decoded) ? self::summarizeNodeResponse($decoded) : ['raw' => mb_substr((string) $response, 0, 300, 'UTF-8')];
|
|
|
|
|
SplitTicketSyncLogger::log('node_response', 'POST ' . $endpoint, array_merge([
|
|
|
|
|
'httpCode' => $httpCode,
|
|
|
|
|
'elapsedMs' => $elapsedMs,
|
|
|
|
|
'responseSize' => strlen((string) $response),
|
|
|
|
|
], $summary));
|
2026-06-09 03:36:30 +08:00
|
|
|
return is_array($decoded) ? $decoded : [];
|
|
|
|
|
}
|
2026-06-10 06:44:57 +08:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @param array<string, mixed> $decoded
|
|
|
|
|
* @return array<string, mixed>
|
|
|
|
|
*/
|
|
|
|
|
private static function summarizeNodeResponse(array $decoded): array
|
|
|
|
|
{
|
|
|
|
|
$summary = [
|
|
|
|
|
'success' => $decoded['success'] ?? null,
|
|
|
|
|
'error' => $decoded['error'] ?? null,
|
|
|
|
|
];
|
|
|
|
|
if (isset($decoded['interceptedApis']) && is_array($decoded['interceptedApis'])) {
|
|
|
|
|
$summary['interceptedApis'] = array_keys($decoded['interceptedApis']);
|
|
|
|
|
}
|
|
|
|
|
if (isset($decoded['results']) && is_array($decoded['results'])) {
|
|
|
|
|
$summary['resultApis'] = array_keys($decoded['results']);
|
|
|
|
|
}
|
|
|
|
|
if (isset($decoded['data']) && is_array($decoded['data'])) {
|
|
|
|
|
$summary['dataPages'] = count($decoded['data']);
|
|
|
|
|
}
|
|
|
|
|
return $summary;
|
|
|
|
|
}
|
2026-06-09 03:36:30 +08:00
|
|
|
}
|