完整版V1 加入爬虫功能

This commit is contained in:
root
2026-06-09 03:36:30 +08:00
parent 34d76cce74
commit a68b83fcbd
69 changed files with 5058 additions and 56 deletions
@@ -0,0 +1,233 @@
<?php
declare(strict_types=1);
namespace app\common\library\scrm;
use app\common\service\SplitTicketSyncLogger;
use Exception;
/**
* 云控蜘蛛抽象基类(Node Headless 拦截 + 翻页 + 清洗)
*/
abstract class AbstractScrmSpider implements ScrmSpiderInterface
{
public const MODE_FETCH = 'fetch';
public const MODE_UI = 'ui_click';
protected string $nodeHost;
public function __construct(string $nodeHost = 'http://127.0.0.1:3001')
{
$this->nodeHost = rtrim($nodeHost, '/');
}
/** @return array<string, mixed> */
abstract protected function getSpiderConfig(): array;
/**
* @param array<string, mixed>|null $countData
*/
abstract protected function extractListTotalPages($listFirstPageData, $countData = null);
/**
* @return array<string, mixed>
*/
abstract protected function buildListPageParams(int $page): array;
/** @return array<string, mixed> */
abstract protected function getUiPaginationConfig(): array;
/**
* @param mixed $detailData
* @param array<int, mixed> $allListPagesData
*/
abstract protected function parseToUnifiedData($detailData, array $allListPagesData): UnifiedScrmData;
public function run(): UnifiedScrmData
{
$config = $this->getSpiderConfig();
SplitTicketSyncLogger::log('spider', 'run start', [
'nodeHost' => $this->nodeHost,
'pageUrl' => $config['pageUrl'] ?? '',
'listApi' => $config['listApi'] ?? '',
'paginationMode' => $config['paginationMode'] ?? self::MODE_FETCH,
]);
$listApi = (string) ($config['listApi'] ?? '');
$detailApi = $config['detailApi'] ?? null;
$countApi = $config['countApi'] ?? null;
$apiUrlsToIntercept = [$listApi];
if ($detailApi) {
$apiUrlsToIntercept[] = $detailApi;
}
if ($countApi) {
$apiUrlsToIntercept[] = $countApi;
}
$initResult = $this->requestNode('/api/auth-and-intercept', [
'pageUrl' => $config['pageUrl'],
'apiUrls' => $apiUrlsToIntercept,
'authActions' => $config['authActions'] ?? [],
]);
if (empty($initResult['success'])) {
SplitTicketSyncLogger::log('spider', 'auth-and-intercept failed', [
'error' => $initResult['error'] ?? '未知',
]);
throw new Exception('初始化失败: ' . ($initResult['error'] ?? '未知'));
}
$interceptedApis = $initResult['interceptedApis'];
SplitTicketSyncLogger::log('spider', 'auth-and-intercept ok', [
'intercepted' => array_keys($interceptedApis),
]);
$cookies = $initResult['cookies'];
if (!isset($interceptedApis[$listApi])) {
throw new Exception("致命错误:未能拦截到必须的列表接口 [{$listApi}]");
}
$detailData = $detailApi && isset($interceptedApis[$detailApi])
? $interceptedApis[$detailApi]['data'] : null;
$countData = $countApi && isset($interceptedApis[$countApi])
? $interceptedApis[$countApi]['data'] : null;
$listApiNode = $interceptedApis[$listApi];
$allListPagesData = [$listApiNode['data']];
$totalPages = $this->extractListTotalPages($listApiNode['data'], $countData);
$mode = $config['paginationMode'] ?? self::MODE_FETCH;
SplitTicketSyncLogger::log('spider', 'pagination plan', [
'totalPages' => $totalPages,
'mode' => $mode,
]);
if ($totalPages > 1 || $totalPages === null) {
if ($mode === self::MODE_FETCH && $totalPages !== null) {
$paramList = [];
for ($page = 2; $page <= $totalPages; $page++) {
$paramList[] = $this->buildListPageParams($page);
}
$fetchResult = $this->requestNode('/api/batch-fetch', [
'tasks' => [[
'apiPath' => $listApi,
'fullUrl' => $listApiNode['url'],
'headers' => $listApiNode['headers'] ?? '',
'paramList' => $paramList,
'method' => $config['listMethod'] ?? 'GET',
]],
'cookies' => $cookies,
], 120);
if (!empty($fetchResult['success'])) {
foreach ($fetchResult['results'][$listApi] as $pResult) {
if (!empty($pResult['success'])) {
$allListPagesData[] = $pResult['data'];
}
}
}
} elseif ($mode === self::MODE_UI) {
$uiConfig = $this->getUiPaginationConfig();
$firstPageData = $listApiNode['data'];
$clicksToPerform = ($totalPages === null) ? 9999 : ($totalPages - 1);
$uiResult = $this->requestNode('/api/ui-pagination', [
'apiUrl' => $listApi,
'pageUrl' => $config['pageUrl'],
'nextBtnSelector' => $uiConfig['nextBtnSelector'] ?? '',
'waitMs' => $uiConfig['waitMs'] ?? 2000,
'clicksToPerform' => $clicksToPerform,
'cookies' => $cookies,
'firstPageData' => $firstPageData,
'authActions' => $config['authActions'] ?? [],
], 1200);
if (!empty($uiResult['success']) && !empty($uiResult['data'])) {
foreach ($uiResult['data'] as $pageData) {
$allListPagesData[] = $pageData;
}
}
}
}
$result = $this->parseToUnifiedData($detailData, $allListPagesData);
SplitTicketSyncLogger::log('spider', 'run done', [
'todayNewCount' => $result->todayNewCount,
'totalOnline' => $result->totalOnline,
'totalOffline' => $result->totalOffline,
'total' => $result->total,
'numberCount' => count($result->numbers),
]);
return $result;
}
/**
* @param array<string, mixed> $payload
* @return array<string, mixed>
*/
protected function requestNode(string $endpoint, array $payload, int $timeout = 60): array
{
$url = $this->nodeHost . $endpoint;
$started = microtime(true);
SplitTicketSyncLogger::log('node_request', 'POST ' . $endpoint, [
'url' => $url,
'timeout' => $timeout,
'payload' => $payload,
]);
$ch = curl_init($url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($payload, JSON_UNESCAPED_UNICODE));
curl_setopt($ch, CURLOPT_HTTPHEADER, ['Content-Type: application/json']);
curl_setopt($ch, CURLOPT_TIMEOUT, $timeout);
$response = curl_exec($ch);
$httpCode = (int) curl_getinfo($ch, CURLINFO_HTTP_CODE);
$elapsedMs = (int) round((microtime(true) - $started) * 1000);
if (curl_errno($ch)) {
$err = curl_error($ch);
curl_close($ch);
SplitTicketSyncLogger::log('node_response', 'curl error on ' . $endpoint, [
'httpCode' => $httpCode,
'elapsedMs' => $elapsedMs,
'error' => $err,
]);
throw new Exception($err);
}
curl_close($ch);
$decoded = json_decode((string) $response, true);
$summary = is_array($decoded) ? self::summarizeNodeResponse($decoded) : ['raw' => mb_substr((string) $response, 0, 300, 'UTF-8')];
SplitTicketSyncLogger::log('node_response', 'POST ' . $endpoint, array_merge([
'httpCode' => $httpCode,
'elapsedMs' => $elapsedMs,
'responseSize' => strlen((string) $response),
], $summary));
return is_array($decoded) ? $decoded : [];
}
/**
* @param array<string, mixed> $decoded
* @return array<string, mixed>
*/
private static function summarizeNodeResponse(array $decoded): array
{
$summary = [
'success' => $decoded['success'] ?? null,
'error' => $decoded['error'] ?? null,
];
if (isset($decoded['interceptedApis']) && is_array($decoded['interceptedApis'])) {
$summary['interceptedApis'] = array_keys($decoded['interceptedApis']);
}
if (isset($decoded['results']) && is_array($decoded['results'])) {
$summary['resultApis'] = array_keys($decoded['results']);
}
if (isset($decoded['data']) && is_array($decoded['data'])) {
$summary['dataPages'] = count($decoded['data']);
}
return $summary;
}
}
@@ -0,0 +1,18 @@
<?php
declare(strict_types=1);
namespace app\common\library\scrm;
/**
* 云控蜘蛛统一接口
*/
interface ScrmSpiderInterface
{
/**
* 执行抓取并返回统一数据
*
* @throws \Exception
*/
public function run(): UnifiedScrmData;
}
@@ -0,0 +1,50 @@
<?php
declare(strict_types=1);
namespace app\common\library\scrm;
/**
* 云控蜘蛛统一返回数据结构
*/
class UnifiedScrmData
{
/** @var int 今日新增(完成数量) */
public int $todayNewCount = 0;
/** @var int 在线号码数 */
public int $totalOnline = 0;
/** @var int 离线号码数 */
public int $totalOffline = 0;
/** @var array<int, array{number:string,status:string,newFollowersToday:int}> */
public array $numbers = [];
/** @var int 号码总数 */
public int $total = 0;
/**
* @param string $number 号码
* @param bool $isOnline 是否在线
* @param int $newFollowersToday 今日进线
*/
public function addNumber(string $number, bool $isOnline, int $newFollowersToday = 0): void
{
$number = trim($number);
if ($number === '') {
return;
}
$this->numbers[] = [
'number' => $number,
'status' => $isOnline ? 'online' : 'offline',
'newFollowersToday' => max(0, $newFollowersToday),
];
if ($isOnline) {
$this->totalOnline++;
} else {
$this->totalOffline++;
}
}
}
@@ -0,0 +1,98 @@
<?php
declare(strict_types=1);
namespace app\common\library\scrm\spider;
use app\common\library\scrm\AbstractScrmSpider;
use app\common\library\scrm\UnifiedScrmData;
/**
* A2C 云控蜘蛛
*/
class A2cSpider extends AbstractScrmSpider
{
private const API_LIST = '/api/talk/counter/share/record/list';
private const API_DETAILS = '/api/talk/counter/share/detail';
private const DEFAULT_PER_PAGE_COUNT = 20;
private string $pageUrl;
private string $account;
private string $password;
private UnifiedScrmData $unifiedData;
public function __construct(
string $pageUrl,
string $account = '',
string $password = '',
string $nodeHost = 'http://127.0.0.1:3001'
) {
parent::__construct($nodeHost);
$this->pageUrl = $pageUrl;
$this->account = $account;
$this->password = $password;
$this->unifiedData = new UnifiedScrmData();
}
protected function getSpiderConfig(): array
{
return [
'pageUrl' => $this->pageUrl,
'listApi' => self::API_LIST,
'detailApi' => self::API_DETAILS,
'listMethod' => 'POST',
'paginationMode' => self::MODE_UI,
'authActions' => [
['type' => 'wait', 'ms' => 2000],
],
];
}
protected function extractListTotalPages($listFirstPageData, $countData = null)
{
$total = (int) ($listFirstPageData['data']['total'] ?? 0);
$this->unifiedData->total = $total;
if ($total <= self::DEFAULT_PER_PAGE_COUNT) {
return 1;
}
return (int) ceil($total / self::DEFAULT_PER_PAGE_COUNT);
}
protected function buildListPageParams(int $page): array
{
return ['page' => $page, 'pageSize' => self::DEFAULT_PER_PAGE_COUNT];
}
protected function getUiPaginationConfig(): array
{
return [
'nextBtnSelector' => '.btn-next',
'waitMs' => 2000,
];
}
protected function parseToUnifiedData($detailData, array $allListPagesData): UnifiedScrmData
{
$unifiedData = $this->unifiedData;
if ($detailData) {
$unifiedData->todayNewCount = (int) ($detailData['data']['newFollowersToday'] ?? 0);
}
foreach ($allListPagesData as $pageRaw) {
$records = $pageRaw['data']['rows'] ?? [];
foreach ($records as $item) {
if (empty($item['account'])) {
continue;
}
$number = (string) $item['account'];
$isOnline = isset($item['numberStatus']) && (int) $item['numberStatus'] === 1;
$unifiedData->addNumber($number, $isOnline, (int) ($item['newFollowersToday'] ?? 0));
}
}
return $unifiedData;
}
}
@@ -0,0 +1,101 @@
<?php
declare(strict_types=1);
namespace app\common\library\scrm\spider;
use app\common\library\scrm\AbstractScrmSpider;
use app\common\library\scrm\UnifiedScrmData;
/**
* 海王云控蜘蛛
*/
class HaiwangSpider extends AbstractScrmSpider
{
private const API_LIST = '/webApi/accountshow/list';
private const DEFAULT_PER_PAGE_COUNT = 10;
private string $pageUrl;
private string $account;
private string $password;
private UnifiedScrmData $unifiedData;
public function __construct(
string $pageUrl,
string $account = '',
string $password = '',
string $nodeHost = 'http://127.0.0.1:3001'
) {
parent::__construct($nodeHost);
$this->pageUrl = $pageUrl;
$this->account = $account;
$this->password = $password;
$this->unifiedData = new UnifiedScrmData();
}
protected function getSpiderConfig(): array
{
return [
'pageUrl' => $this->pageUrl,
'listApi' => self::API_LIST,
'listMethod' => 'POST',
'paginationMode' => self::MODE_UI,
'authActions' => [
['type' => 'type', 'selector' => 'input[type="password"]', 'value' => $this->password],
['type' => 'press', 'key' => 'Enter'],
['type' => 'wait', 'ms' => 2000],
],
];
}
protected function extractListTotalPages($listFirstPageData, $countData = null)
{
$total = (int) ($listFirstPageData['data']['total'] ?? 0);
$this->unifiedData->total = $total;
if ($total <= self::DEFAULT_PER_PAGE_COUNT) {
return 1;
}
return (int) ceil($total / self::DEFAULT_PER_PAGE_COUNT);
}
protected function buildListPageParams(int $page): array
{
return ['page' => $page, 'limit' => self::DEFAULT_PER_PAGE_COUNT];
}
protected function getUiPaginationConfig(): array
{
return [
'nextBtnSelector' => '.btn-next',
'waitMs' => 2000,
];
}
protected function parseToUnifiedData($detailData, array $allListPagesData): UnifiedScrmData
{
$unifiedData = $this->unifiedData;
foreach ($allListPagesData as $pageRaw) {
$records = $pageRaw['data']['items'] ?? [];
foreach ($records as $item) {
if (empty($item['acclist_account'])) {
continue;
}
$number = (string) $item['acclist_account'];
$isOnline = isset($item['acclist_status']) && (int) $item['acclist_status'] === 2;
$unifiedData->addNumber(
$number,
$isOnline,
(int) ($item['account_statistics_today_effective'] ?? 0)
);
}
}
if (!empty($allListPagesData[0]['data']['shareStatistics']['sharecode_statistics_today_contact_effective'])) {
$unifiedData->todayNewCount = (int) $allListPagesData[0]['data']['shareStatistics']['sharecode_statistics_today_contact_effective'];
}
return $unifiedData;
}
}
@@ -0,0 +1,89 @@
<?php
declare(strict_types=1);
namespace app\common\library\scrm\spider;
use app\common\library\scrm\AbstractScrmSpider;
use app\common\library\scrm\UnifiedScrmData;
/**
* 火箭云控蜘蛛
*/
class HuojianSpider extends AbstractScrmSpider
{
private const API_LIST = '/prod-api1/biz/counter/link/share/';
private string $pageUrl;
private string $account;
private string $password;
private UnifiedScrmData $unifiedData;
public function __construct(
string $pageUrl,
string $account = '',
string $password = '',
string $nodeHost = 'http://127.0.0.1:3001'
) {
parent::__construct($nodeHost);
$this->pageUrl = $pageUrl;
$this->account = $account;
$this->password = $password;
$this->unifiedData = new UnifiedScrmData();
}
protected function getSpiderConfig(): array
{
return [
'pageUrl' => $this->pageUrl,
'listApi' => self::API_LIST,
'listMethod' => 'POST',
'paginationMode' => self::MODE_UI,
'authActions' => [
['type' => 'vue_fill', 'selector' => '.el-message-box__input input', 'value' => $this->password],
['type' => 'wait', 'ms' => 500],
['type' => 'vue_click', 'selector' => '.el-message-box__btns .el-button--primary'],
['type' => 'wait', 'ms' => 2000],
],
];
}
protected function extractListTotalPages($listFirstPageData, $countData = null)
{
return 1;
}
protected function buildListPageParams(int $page): array
{
return [];
}
protected function getUiPaginationConfig(): array
{
return [];
}
protected function parseToUnifiedData($detailData, array $allListPagesData): UnifiedScrmData
{
$unifiedData = $this->unifiedData;
$unifiedData->todayNewCount = (int) ($allListPagesData[0]['data']['counterWorker']['newTodayFriend'] ?? 0);
$count = 0;
foreach ($allListPagesData as $pageRaw) {
$records = $pageRaw['data']['counterCsAccountVo'] ?? [];
foreach ($records as $item) {
if (empty($item['accountLogin'])) {
continue;
}
$number = (string) $item['accountLogin'];
$isOnline = isset($item['accountStatus']) && (int) $item['accountStatus'] === 1;
$count++;
$unifiedData->addNumber($number, $isOnline, (int) ($item['newTodayFriend'] ?? 0));
}
}
$unifiedData->total = $count;
return $unifiedData;
}
}
@@ -0,0 +1,102 @@
<?php
declare(strict_types=1);
namespace app\common\library\scrm\spider;
use app\common\library\scrm\AbstractScrmSpider;
use app\common\library\scrm\UnifiedScrmData;
/**
* SS云控(Customer) 蜘蛛
*/
class SsCustomerSpider extends AbstractScrmSpider
{
private const API_LIST = '/sys/share/report/get-customer-analysis-dimension-list';
private const API_DETAILS = '/sys/share/report/get-customer-analysis-statistics';
private const DEFAULT_PER_PAGE_COUNT = 20;
private string $pageUrl;
private string $account;
private string $password;
private UnifiedScrmData $unifiedData;
public function __construct(
string $pageUrl,
string $account = '',
string $password = '',
string $nodeHost = 'http://127.0.0.1:3001'
) {
parent::__construct($nodeHost);
$this->pageUrl = $pageUrl;
$this->account = $account;
$this->password = $password;
$this->unifiedData = new UnifiedScrmData();
}
protected function getSpiderConfig(): array
{
return [
'pageUrl' => $this->pageUrl,
'listApi' => self::API_LIST,
'detailApi' => self::API_DETAILS,
'listMethod' => 'POST',
'paginationMode' => self::MODE_UI,
'authActions' => [
['type' => 'wait', 'ms' => 2000],
['type' => 'type', 'selector' => 'input[type="password"]', 'value' => $this->password],
['type' => 'press', 'key' => 'Enter'],
['type' => 'wait', 'ms' => 3000],
[
'type' => 'vue_click',
'selector' => 'button[class*="reports-customers__dimension"]',
'text' => 'social media accounts',
],
['type' => 'wait', 'ms' => 3000],
],
];
}
protected function extractListTotalPages($listFirstPageData, $countData = null)
{
return null;
}
protected function buildListPageParams(int $page): array
{
return ['page' => $page, 'pageSize' => self::DEFAULT_PER_PAGE_COUNT];
}
protected function getUiPaginationConfig(): array
{
return [
'nextBtnSelector' => '.arco-pagination-item-next',
'waitMs' => 2000,
];
}
protected function parseToUnifiedData($detailData, array $allListPagesData): UnifiedScrmData
{
$unifiedData = $this->unifiedData;
if ($detailData) {
$unifiedData->todayNewCount = (int) ($detailData['data']['distinct_contacts_total'] ?? 0);
}
foreach ($allListPagesData as $pageRaw) {
$records = $pageRaw['data']['list'] ?? [];
foreach ($records as $item) {
if (empty($item['channel_tag'])) {
continue;
}
$number = (string) $item['channel_tag'];
$unifiedData->addNumber($number, true, (int) ($item['distinct_contacts_total'] ?? 0));
}
}
$unifiedData->total = count($unifiedData->numbers);
return $unifiedData;
}
}
@@ -0,0 +1,93 @@
<?php
declare(strict_types=1);
namespace app\common\library\scrm\spider;
use app\common\library\scrm\AbstractScrmSpider;
use app\common\library\scrm\UnifiedScrmData;
/**
* 星河云控蜘蛛
*/
class XingheSpider extends AbstractScrmSpider
{
private const API_LIST = '/share/share/api_yinliu_count.html';
private const DEFAULT_PER_PAGE_COUNT = 10;
private string $pageUrl;
private string $account;
private string $password;
private UnifiedScrmData $unifiedData;
public function __construct(
string $pageUrl,
string $account = '',
string $password = '',
string $nodeHost = 'http://127.0.0.1:3001'
) {
parent::__construct($nodeHost);
$this->pageUrl = $pageUrl;
$this->account = $account;
$this->password = $password;
$this->unifiedData = new UnifiedScrmData();
}
protected function getSpiderConfig(): array
{
return [
'pageUrl' => $this->pageUrl,
'listApi' => self::API_LIST,
'listMethod' => 'GET',
'paginationMode' => self::MODE_FETCH,
'authActions' => [
['type' => 'wait', 'ms' => 2000],
],
];
}
protected function extractListTotalPages($listFirstPageData, $countData = null)
{
$total = (int) ($listFirstPageData['count'] ?? 0);
$this->unifiedData->total = $total;
$this->unifiedData->todayNewCount = (int) ($listFirstPageData['totalRow']['day_sum'] ?? 0);
if ($total <= self::DEFAULT_PER_PAGE_COUNT) {
return 1;
}
return (int) ceil($total / self::DEFAULT_PER_PAGE_COUNT);
}
protected function buildListPageParams(int $page): array
{
return ['page' => $page, 'limit' => self::DEFAULT_PER_PAGE_COUNT];
}
protected function getUiPaginationConfig(): array
{
return [
'nextBtnSelector' => '.layui-laypage-next',
'waitMs' => 2000,
];
}
protected function parseToUnifiedData($detailData, array $allListPagesData): UnifiedScrmData
{
$unifiedData = $this->unifiedData;
foreach ($allListPagesData as $pageRaw) {
$records = $pageRaw['data'] ?? [];
foreach ($records as $item) {
if (empty($item['user'])) {
continue;
}
$number = (string) $item['user'];
$isOnline = isset($item['online']) && (int) $item['online'] === 1;
$unifiedData->addNumber($number, $isOnline, (int) ($item['day_sum'] ?? 0));
}
}
return $unifiedData;
}
}