Merge pull request #289 from shikorism/fix/nijie-unescape-after-json-decode

NijieResolver: JSONデコード後にHTMLエンティティのデコードを行う
This commit is contained in:
shibafu
2019-10-04 00:32:57 +09:00
committed by GitHub
3 changed files with 139 additions and 2 deletions

View File

@@ -36,12 +36,16 @@ class NijieResolver implements Resolver
$metadata = $this->ogpResolver->parse($html);
$crawler = new Crawler($html);
// DomCrawler内でjson内の日本語がHTMLエンティティに変換されるのでhtml_entity_decode
$json = html_entity_decode($crawler->filter('script[type="application/ld+json"]')->first()->text());
$json = $crawler->filter('script[type="application/ld+json"]')->first()->text();
// 改行がそのまま入っていることがあるのでデコード前にエスケープが必要
$data = json_decode(preg_replace('/\r?\n/', '\n', $json), true);
// DomCrawler内でjson内の日本語がHTMLエンティティに変換されるので、全要素に対してhtml_entity_decode
array_walk_recursive($data, function (&$v) {
$v = html_entity_decode($v);
});
$metadata->title = $data['name'];
$metadata->description = '投稿者: ' . $data['author']['name'] . PHP_EOL . $data['description'];
if (