作者 lyh

gx

... ... @@ -135,6 +135,9 @@ class TranslateLogic extends BaseLogic
];
$context = stream_context_create($contextOptions);
$sourceCode = file_get_contents($url, false, $context);
// 过滤掉具有 "change-language-cont" 类的元素
$pattern = '/<div\b[^>]*\sclass=[\'"]([^\'"]*change-language-cont[^\'"]*)[\'"][^>]*>(.*?)<\/div>/is';
$sourceCode = preg_replace($pattern, '', $sourceCode);
$pattern = '/<style\b[^>]*>(.*?)<\/style>/s'; // 定义匹配`<style>`标签及其内容的正则表达式
$strippedContent = preg_replace($pattern, '', $sourceCode); // 删除`<style>`标签及其内容
$pattern = '/<script\b[^>]*>(.*?)<\/script>/s'; // 定义匹配`<script>`标签及其内容的正则表达式
... ... @@ -147,30 +150,25 @@ class TranslateLogic extends BaseLogic
$textContentArray = array_filter($matches[1], function($item) {
return !empty(trim($item));
});
$data = [];
$contentData = [];
foreach ($textContentArray as $v){
$content = trim($v);
$trimmedString = preg_replace('/\s+/', ' ', $content);
$data[] = $trimmedString;
$contentData[] = $trimmedString;
}
$contentData = array_values($contentData);
$pattern = '/<meta\s+[^>]*name=[\'"]([^\'"]+)[\'"][^>]*content=[\'"]([^\'"]+)[\'"]/i'; // 匹配 meta 标签的正则表达式
$matches1 = array();
preg_match_all($pattern, $strippedContent, $matches1);
$metaData = array();
foreach ($matches1[1] as $index) {
$content = $matches1[2][$index];
$metaData[] = $content;
}
$data = array_values($data);
$data = array_merge($metaData, $contentData);
return $data;
}
/**
* 翻译校对 dom
*/
public function proofreadPhpQuery($url)
{
$html = file_get_contents($url);
$dom = \phpQuery::newDocument($html);
$dom->find('.change-language')->remove();
$dom->find('script')->remove();
$dom->find('style')->remove();
$test = $dom->text();
file_put_contents($url, $test);
$this->success($test);
}
/**
* @remark :获取Url内容
... ...