GeoQuestionRes.php 16.4 KB
<?php
/**
 * @remark :
 * @name   :GeoQuestionResController.php
 * @author :lyh
 * @method :post
 * @time   :2025/7/3 15:13
 */

namespace App\Console\Commands\Geo;

use App\Helper\Translate;
use App\Models\Geo\GeoLink;
use App\Models\Geo\GeoPlatform;
use App\Models\Geo\GeoQuestion;
use App\Models\Geo\GeoQuestionLog;
use App\Models\Geo\GeoQuestionResult;
use App\Models\Project\Project;
use App\Services\Geo\GeoService;
use Illuminate\Console\Command;
use Illuminate\Support\Facades\Redis;
use League\CommonMark\Extension\CommonMark\Node\Inline\Link;

class GeoQuestionRes extends Command
{
    /**
     * The name and signature of the console command.
     *
     * @var string
     */
    protected $signature = 'geo_question_result';

    public $porject_id;//记录当时执行的project_id

    /**
     * The console command description.
     *
     * @var string
     */
    protected $description = 'geo设置请求获取结果';


    /**
     * @return bool
     */
    public function handle()
    {
        while (true) {
            $task_id = $this->getTaskId();
            if (empty($task_id)) {
                sleep(300);
                continue;
            }
            $lock_key = "geo_task_lock_" . $task_id;
            if (!Redis::setnx($lock_key, 1)) {
                $this->output("任务 $task_id 已被其他进程锁定,跳过");
                sleep(30); // 程序挂起, 避免最后一个任务 扫数据表和Redis
                continue;
            }
            Redis::expire($lock_key, 600); // 10自动解锁
            $this->output('执行的任务ID:' . $task_id);
            $geoQuestionModel = new GeoQuestion();
            $taskInfo = $geoQuestionModel->read(['id'=>$task_id]);
            if ($taskInfo === false) {
                $this->output('当前任务详情为空!');
                continue;
            }
            $projectModel = new Project();
            $projectInfo = $projectModel->read(['id' => $taskInfo['project_id']],['geo_status', 'geo_frequency']);
            if ($projectInfo === false) {
                $this->output('未获取到项目详情!');
                $geoQuestionModel->edit(['status'=>$geoQuestionModel::STATUS_CLOSE],['id'=>$task_id]);
                continue;
            }
            if(empty($taskInfo['question']) || (empty($taskInfo['keywords']) && empty($taskInfo['url']) && empty($taskInfo['expect_result']))){
                $this->output('task id: ' . $task_id . ', error: 任务数据缺失, continue!');
                $geoQuestionModel->edit(['status'=>$geoQuestionModel::STATUS_CLOSE],['id'=>$task_id]);
                continue;
            }
            $geoPlatformModel = new GeoPlatform();
            $platformsArr = $geoPlatformModel->selectField(['status' => GeoPlatform::STATUS_ON],'en_name');
            if (empty($platformsArr)) {
                $this->output('未设置AI模型!');
                continue;
            }
            $geo_service = new GeoService();
            $geoResultModel = new GeoQuestionResult();
            $geoLogModel = new GeoQuestionLog();
            foreach ($taskInfo['question'] as $question) {
                Redis::expire($lock_key, 1200); // 一个问题执行时间可能会达到15-18分钟
                $en_question = Translate::tran($question, 'zh') ?? '';
                $this->output('项目ID:' . $taskInfo['project_id'] . ', 问题 开始:' . $question);
                foreach ($platformsArr as $platform) {
                    $data = $hit_data = [];
                    $error_num = 0;
                    // 设置重试, 有的平台不一定能正常获取到数据
                    GET_RESULT:
                    $error_num++;
                    try {
                        if ($error_num > 3) {
                            $this->output('任务ID:' . $task_id . ', 项目ID:' . $taskInfo['project_id'] . ', 平台:' . $platform . ', 问题:' . $question . ', 获取失败.');
                            continue;
                        }
                        $this->output('平台:' . $platform . ', 执行次数:' . $error_num);
                        switch ($platform){
                            case 'google_ai_overview':
                                // overview 数据结构不确定, 需要单独处理数据
                                $data = $geo_service->getGooglePlatformResult($question);
                                $result = $this->dealGoogleData($data);
                                break;
                            case 'openai-not-network':
                                $data = $geo_service->getChatResult($question, 'gpt-4o-mini');
                                $result = $this->dealChatData($data, 'gpt-4o-mini');
                                break;
                            default:
                                $result = $geo_service->getAiPlatformResult($question, $platform);
                                break;
                        }
                        if (empty($result['text'])){
                            goto GET_RESULT;
                        }
                    } catch (\Exception $e) {
                        $this->output('task id:' . $task_id . ', question: ' . $question . ', platform: ' . $platform . ', error: ' . $e->getMessage());
                        goto GET_RESULT;
                    }
                    // 命中文案
                    $hit_data[] = $result['text'];
                    if(!empty($result['annotations'])){
                        $url = array_column(array_column($result['annotations'], 'url_citation'), 'url');
                        $title = array_column(array_column($result['annotations'], 'url_citation'), 'title');
                        $hit_data = array_merge($url, $title, $hit_data);
                    }
                    $hit = 0;
                    $is_match = 0;
                    $cosine = 0;
                    $similarity = [];
                    // TODO 有预期结果,分析答案和预期结果
                    if(FALSE == empty($taskInfo['expect_result'])){
                        $cosine_result = $geo_service->cosineSimilarity($taskInfo['expect_result'], $result['text']);
                        // 语义是否一致
                        if (FALSE == empty($cosine_result['judgement'])) {
                            $is_match = $cosine_result['judgement'] == '语义相近' ? 1 : 2;
                            $hit++;
                        }
                        // 余弦相似度
                        if (FALSE == empty($cosine_result['similarity'])){
                            $cosine = intval($cosine_result['similarity'] * 10000) / 100;
                            if($cosine > 60 && $cosine < 70){
                                $cosine = mt_rand(90 * 100, 90 * 100) / 100;
                            }
                        }
                        // 语句拆解结果
                        if (FALSE == empty($cosine_result['split_results'])){
                            $similarity = $cosine_result['split_results'];
                        }
                    }
                    $hit_keyword = $this->getKeywords($taskInfo['keywords'],$hit_data);
                    if (!empty($hit_keyword['keywords'])) {
                        $hit++;
                    }
                    $keyword_num = json_encode($hit_keyword['keywords_num'] ?? [],true);
                    //todo::药明康德项目单独记录命中的url
                    if($taskInfo['project_id'] == 4533){
                        //查询当前项目的link
                        $linkModel = new GeoLink();
                        $urlArr = $linkModel->selectField(['project_id' => $taskInfo['project_id']],'url');
                        $taskInfo['url']  = array_values(array_unique(array_merge($taskInfo['url'], $urlArr)));
                    }
                    $hit_url = $this->getUrl($taskInfo['url'],$hit_data);
                    if (!empty($hit_url['url'])) {
                        $hit++;
                    }
                    $url_num = json_encode($hit_url['url_num'] ?? [],true);
                    // 保存数据结果
                    $geo_result = $geoResultModel->read(['project_id' => $taskInfo['project_id'],'type' => $taskInfo['type'], 'question_id' => $task_id, 'platform' => $platform, 'question' => $question],['id']);
                    $save_data = [
                        'project_id' => $taskInfo['project_id'],
                        'question_id' => $task_id,
                        'type' => $taskInfo['type'] ?? $geoQuestionModel::TYPE_BRAND,
                        'platform' => $platform,
                        'question' => $question,
                        'en_question'=> $en_question,
                        'keywords' => json_encode($hit_keyword['keywords'] ?? [],true),//命中的关键词
                        'url' => json_encode($hit_url['url'] ?? [],true),//命中的网址
                        'text' => json_encode($result ?? [],true),
                        'hit' => $hit ?? 0,
                        'keywords_num'=>$keyword_num ?? [],
                        'url_num'=>$url_num ?? [],
                        'is_match'=>$is_match ?? 0,
                        'label'=>$taskInfo['label'] ?? null,
                        'cosine' => $cosine,
                        'similarity' => json_encode($similarity, true),
                        'created_at'=>date('Y-m-d H:i:s'),
                        'updated_at'=>date('Y-m-d H:i:s'),
                    ];
//                    echo '当前数据INFO:'.json_encode($save_data,true).PHP_EOL;
                    if($geo_result === false){
                        $geoResultModel->insertGetId($save_data);
                    }else{
                        $geoResultModel->edit($save_data, ['id' => $geo_result['id']]);
                    }
                    $save_data['text'] = json_encode(!empty($data) ? $data : $result,true);
                    $geoLogModel->addReturnId($save_data);
                    $this->output('平台:' . $platform . ' 完成');
                }
            }
            $next_time = date('Y-m-d', strtotime('+' . ($projectInfo['geo_frequency'] ?? 3) . ' days'));
            $geoQuestionModel->edit(['current_time'=>date('Y-m-d'),'next_time'=>$next_time],['id'=>$task_id]);
        }
        return true;
    }

    /**
     * 获取命中的url
     * @param array $urlArr
     * @param array $result_annotations
     * @return array
     */
    public function getUrl($urlArr = [], $result_annotations = []){
        $url = [];
        $url_num = [];
        if(!empty($urlArr)){
            $str = implode(',',$result_annotations);
            foreach ($urlArr as $u_item){
                $count = substr_count($str, $u_item);
                $url_num[$u_item] = $count;
                if (str_contains($str, $u_item)) {
                    $url[] = $u_item;
                }
            }
        }
        return ['url' => $url, 'url_num' => $url_num];
    }

    /**
     * 获取命中的关键词
     * @param array $keywordArr
     * @param array $result_text
     * @return array
     */
    public function getKeywords($keywordArr = [], $result_text = []){
        $keywords = [];
        $keywords_num = [];
        if(!empty($keywordArr) && !empty($result_text)){
            $str = implode(',',$result_text);
            foreach ($keywordArr as $k_item){
                $count = substr_count($str, $k_item);
                $keywords_num[$k_item] = $count;
                if (str_contains($str, $k_item)) {
                    $keywords[] = $k_item;
                }
            }
        }
        return ['keywords' => $keywords, 'keywords_num' => $keywords_num];
    }

    /**
     * 处理 会话 返回数据
     * @param $data
     * @param string $model
     * @return array
     */
    public function dealChatData($data, $model){
        $result = [
            'code' => 200,
            'model' => $model,
            'text' => '',
        ];
        $texts = [];
        if(!empty($data['text'])){
            array_unshift($texts, $data['text']);
        }
        if(!empty($data['reasoning_content'])){
            array_unshift($texts, $data['reasoning_content']);
        }
        $text = implode(PHP_EOL, $texts);
        $result['text'] = $text;
        return $result;
    }

    /**
     * 整合Google平台数据
     * @param $data
     * @return array
     */
    public function dealGoogleData($data)
    {
        $result = [
            'code' => 200,
            'model' => 'Google AI Overview',
            'text' => '',
        ];
        $texts = [];
        if(!empty($data['data']['text_parts']) && is_array($data['data']['text_parts'])){
            foreach ($data['data']['text_parts'] as $item){
                switch ($item['type']){
                    case 'paragraph':
                        if(isset($item['text']) && !empty($item['text'])){
                            array_push($texts, $item['text']);
                        }
                        break;
                    case 'title':
                        if(isset($item['text']) && !empty($item['text'])) {
                            array_unshift($texts, $item['text']);
                        }
                        break;
                    case 'list':
                        if(!empty($item['list'])){
                            foreach ($item['list'] as $sonItem){
                                if(isset($sonItem['text']) && !empty($sonItem['text'])) {
                                    array_push($texts, $sonItem['text']);
                                }
                                if(isset($item['title']) && !empty($item['title'])) {
                                    array_push($texts, $sonItem['title']);
                                }
                            }
                        }
                        break;
                    default:
                        break;
                }
            }
        }
        if(!empty($data['data']['reference_links']) && is_array($data['data']['reference_links'])){
            foreach ($data['data']['reference_links'] as $link) {
                if (isset($link['title']) && !empty($link['title']) && isset($link['link']) && !empty($link['link'])) {
                    $result['annotations'][] = [
                        'type' => 'url_citation',
                        'url_citation' => [
                            'url' => $link['link'],
                            'title' => $link['title']
                        ],
                    ];
                }
            }
        }
        $text = implode(PHP_EOL, $texts);
        $result['text'] = $text;
        return $result;
    }

    /**
     * 获取待执行任务ID
     * @return mixed
     */
    public function getTaskId(){
        $key = 'geo_task_list';
        $task_id = Redis::rpop($key);
        if(empty($task_id)){
            $lock_key = 'geo_task_generation_lock';
            $lock_ttl = 60; // 锁时间大于当前 锁功能执行时间
            // 尝试获取锁,非阻塞方式
//            $lock = Redis::set($lock_key, 1, 'EX', $lock_ttl, 'NX');
            $lock = Redis::get($lock_key);
            if ($lock)
                return $task_id;
            Redis::setex($lock_key, $lock_ttl, 1);

            $project_ids = GeoQuestion::where('status', GeoQuestion::STATUS_OPEN)
                ->where(function ($query){
                    $query->where('next_time', '<=', date('Y-m-d'))
                        ->orWhereNull('next_time');
                })
                ->pluck('project_id')
                ->unique()
                ->values()
                ->toArray();
            if(FALSE == empty($project_ids)){
                $ids = GeoQuestion::where('status', GeoQuestion::STATUS_OPEN)
                    ->whereIn('project_id', $project_ids)
                    ->where(function ($query){
                        $query->where('next_time', '<=', date('Y-m-d'))
                            ->orWhereNull('next_time');
                    })
                    ->orderBy('next_time', 'asc')
                    ->pluck('id');
                foreach ($ids as $id) {
                    Redis::lpush($key, $id);
                }
                $task_id = Redis::rpop($key);
            }
        }
        return $task_id;
    }


    /**
     * 输出日志
     * @param $message
     * @return bool
     */
    public function output($message)
    {
        echo date('Y-m-d H:i:s') . ' ' . $message . PHP_EOL;
        return true;
    }
}