正在显示
1 个修改的文件
包含
16 行增加
和
18 行删除
| @@ -135,6 +135,9 @@ class TranslateLogic extends BaseLogic | @@ -135,6 +135,9 @@ class TranslateLogic extends BaseLogic | ||
| 135 | ]; | 135 | ]; |
| 136 | $context = stream_context_create($contextOptions); | 136 | $context = stream_context_create($contextOptions); |
| 137 | $sourceCode = file_get_contents($url, false, $context); | 137 | $sourceCode = file_get_contents($url, false, $context); |
| 138 | + // 过滤掉具有 "change-language-cont" 类的元素 | ||
| 139 | + $pattern = '/<div\b[^>]*\sclass=[\'"]([^\'"]*change-language-cont[^\'"]*)[\'"][^>]*>(.*?)<\/div>/is'; | ||
| 140 | + $sourceCode = preg_replace($pattern, '', $sourceCode); | ||
| 138 | $pattern = '/<style\b[^>]*>(.*?)<\/style>/s'; // 定义匹配`<style>`标签及其内容的正则表达式 | 141 | $pattern = '/<style\b[^>]*>(.*?)<\/style>/s'; // 定义匹配`<style>`标签及其内容的正则表达式 |
| 139 | $strippedContent = preg_replace($pattern, '', $sourceCode); // 删除`<style>`标签及其内容 | 142 | $strippedContent = preg_replace($pattern, '', $sourceCode); // 删除`<style>`标签及其内容 |
| 140 | $pattern = '/<script\b[^>]*>(.*?)<\/script>/s'; // 定义匹配`<script>`标签及其内容的正则表达式 | 143 | $pattern = '/<script\b[^>]*>(.*?)<\/script>/s'; // 定义匹配`<script>`标签及其内容的正则表达式 |
| @@ -147,30 +150,25 @@ class TranslateLogic extends BaseLogic | @@ -147,30 +150,25 @@ class TranslateLogic extends BaseLogic | ||
| 147 | $textContentArray = array_filter($matches[1], function($item) { | 150 | $textContentArray = array_filter($matches[1], function($item) { |
| 148 | return !empty(trim($item)); | 151 | return !empty(trim($item)); |
| 149 | }); | 152 | }); |
| 150 | - $data = []; | 153 | + $contentData = []; |
| 151 | foreach ($textContentArray as $v){ | 154 | foreach ($textContentArray as $v){ |
| 152 | $content = trim($v); | 155 | $content = trim($v); |
| 153 | $trimmedString = preg_replace('/\s+/', ' ', $content); | 156 | $trimmedString = preg_replace('/\s+/', ' ', $content); |
| 154 | - $data[] = $trimmedString; | ||
| 155 | - } | ||
| 156 | - $data = array_values($data); | 157 | + $contentData[] = $trimmedString; |
| 158 | + } | ||
| 159 | + $contentData = array_values($contentData); | ||
| 160 | + $pattern = '/<meta\s+[^>]*name=[\'"]([^\'"]+)[\'"][^>]*content=[\'"]([^\'"]+)[\'"]/i'; // 匹配 meta 标签的正则表达式 | ||
| 161 | + $matches1 = array(); | ||
| 162 | + preg_match_all($pattern, $strippedContent, $matches1); | ||
| 163 | + $metaData = array(); | ||
| 164 | + foreach ($matches1[1] as $index) { | ||
| 165 | + $content = $matches1[2][$index]; | ||
| 166 | + $metaData[] = $content; | ||
| 167 | + } | ||
| 168 | + $data = array_merge($metaData, $contentData); | ||
| 157 | return $data; | 169 | return $data; |
| 158 | } | 170 | } |
| 159 | 171 | ||
| 160 | - /** | ||
| 161 | - * 翻译校对 dom | ||
| 162 | - */ | ||
| 163 | - public function proofreadPhpQuery($url) | ||
| 164 | - { | ||
| 165 | - $html = file_get_contents($url); | ||
| 166 | - $dom = \phpQuery::newDocument($html); | ||
| 167 | - $dom->find('.change-language')->remove(); | ||
| 168 | - $dom->find('script')->remove(); | ||
| 169 | - $dom->find('style')->remove(); | ||
| 170 | - $test = $dom->text(); | ||
| 171 | - file_put_contents($url, $test); | ||
| 172 | - $this->success($test); | ||
| 173 | - } | ||
| 174 | 172 | ||
| 175 | /** | 173 | /** |
| 176 | * @remark :获取Url内容 | 174 | * @remark :获取Url内容 |
-
请 注册 或 登录 后发表评论