nodeHost = rtrim($nodeHost, '/'); } /** @return array */ abstract protected function getSpiderConfig(): array; /** * @param array|null $countData */ abstract protected function extractListTotalPages($listFirstPageData, $countData = null); /** * @return array */ abstract protected function buildListPageParams(int $page): array; /** @return array */ abstract protected function getUiPaginationConfig(): array; /** * @param mixed $detailData * @param array $allListPagesData */ abstract protected function parseToUnifiedData($detailData, array $allListPagesData): UnifiedScrmData; public function run(): UnifiedScrmData { $config = $this->getSpiderConfig(); $listApi = (string) ($config['listApi'] ?? ''); $detailApi = $config['detailApi'] ?? null; $countApi = $config['countApi'] ?? null; $apiUrlsToIntercept = [$listApi]; if ($detailApi) { $apiUrlsToIntercept[] = $detailApi; } if ($countApi) { $apiUrlsToIntercept[] = $countApi; } $initResult = $this->requestNode('/api/auth-and-intercept', [ 'pageUrl' => $config['pageUrl'], 'apiUrls' => $apiUrlsToIntercept, 'authActions' => $config['authActions'] ?? [], ]); if (empty($initResult['success'])) { throw new Exception('初始化失败: ' . ($initResult['error'] ?? '未知')); } $interceptedApis = $initResult['interceptedApis']; $cookies = $initResult['cookies']; if (!isset($interceptedApis[$listApi])) { throw new Exception("致命错误:未能拦截到必须的列表接口 [{$listApi}]"); } $detailData = $detailApi && isset($interceptedApis[$detailApi]) ? $interceptedApis[$detailApi]['data'] : null; $countData = $countApi && isset($interceptedApis[$countApi]) ? $interceptedApis[$countApi]['data'] : null; $listApiNode = $interceptedApis[$listApi]; $allListPagesData = [$listApiNode['data']]; $totalPages = $this->extractListTotalPages($listApiNode['data'], $countData); $mode = $config['paginationMode'] ?? self::MODE_FETCH; if ($totalPages > 1 || $totalPages === null) { if ($mode === self::MODE_FETCH && $totalPages !== null) { $paramList = []; for ($page = 2; $page <= $totalPages; $page++) { $paramList[] = $this->buildListPageParams($page); } $fetchResult = $this->requestNode('/api/batch-fetch', [ 'tasks' => [[ 'apiPath' => $listApi, 'fullUrl' => $listApiNode['url'], 'headers' => $listApiNode['headers'] ?? '', 'paramList' => $paramList, 'method' => $config['listMethod'] ?? 'GET', ]], 'cookies' => $cookies, ], 120); if (!empty($fetchResult['success'])) { foreach ($fetchResult['results'][$listApi] as $pResult) { if (!empty($pResult['success'])) { $allListPagesData[] = $pResult['data']; } } } } elseif ($mode === self::MODE_UI) { $uiConfig = $this->getUiPaginationConfig(); $firstPageData = $listApiNode['data']; $clicksToPerform = ($totalPages === null) ? 9999 : ($totalPages - 1); $uiResult = $this->requestNode('/api/ui-pagination', [ 'apiUrl' => $listApi, 'pageUrl' => $config['pageUrl'], 'nextBtnSelector' => $uiConfig['nextBtnSelector'] ?? '', 'waitMs' => $uiConfig['waitMs'] ?? 2000, 'clicksToPerform' => $clicksToPerform, 'cookies' => $cookies, 'firstPageData' => $firstPageData, 'authActions' => $config['authActions'] ?? [], ], 1200); if (!empty($uiResult['success']) && !empty($uiResult['data'])) { foreach ($uiResult['data'] as $pageData) { $allListPagesData[] = $pageData; } } } } return $this->parseToUnifiedData($detailData, $allListPagesData); } /** * @param array $payload * @return array */ protected function requestNode(string $endpoint, array $payload, int $timeout = 60): array { $ch = curl_init($this->nodeHost . $endpoint); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); curl_setopt($ch, CURLOPT_POST, true); curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($payload, JSON_UNESCAPED_UNICODE)); curl_setopt($ch, CURLOPT_HTTPHEADER, ['Content-Type: application/json']); curl_setopt($ch, CURLOPT_TIMEOUT, $timeout); $response = curl_exec($ch); if (curl_errno($ch)) { $err = curl_error($ch); curl_close($ch); throw new Exception($err); } curl_close($ch); $decoded = json_decode((string) $response, true); return is_array($decoded) ? $decoded : []; } }