Merge pull request #289 from shikorism/fix/nijie-unescape-after-json-decode

NijieResolver: JSONデコード後にHTMLエンティティのデコードを行う
This commit is contained in:
shibafu 2019-10-04 00:32:57 +09:00 committed by GitHub
commit c7e261d06b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 139 additions and 2 deletions

View File

@ -36,12 +36,16 @@ class NijieResolver implements Resolver
$metadata = $this->ogpResolver->parse($html); $metadata = $this->ogpResolver->parse($html);
$crawler = new Crawler($html); $crawler = new Crawler($html);
// DomCrawler内でjson内の日本語がHTMLエンティティに変換されるのでhtml_entity_decode $json = $crawler->filter('script[type="application/ld+json"]')->first()->text();
$json = html_entity_decode($crawler->filter('script[type="application/ld+json"]')->first()->text());
// 改行がそのまま入っていることがあるのでデコード前にエスケープが必要 // 改行がそのまま入っていることがあるのでデコード前にエスケープが必要
$data = json_decode(preg_replace('/\r?\n/', '\n', $json), true); $data = json_decode(preg_replace('/\r?\n/', '\n', $json), true);
// DomCrawler内でjson内の日本語がHTMLエンティティに変換されるので、全要素に対してhtml_entity_decode
array_walk_recursive($data, function (&$v) {
$v = html_entity_decode($v);
});
$metadata->title = $data['name']; $metadata->title = $data['name'];
$metadata->description = '投稿者: ' . $data['author']['name'] . PHP_EOL . $data['description']; $metadata->description = '投稿者: ' . $data['author']['name'] . PHP_EOL . $data['description'];
if ( if (

View File

@ -129,4 +129,23 @@ class NijieResolverTest extends TestCase
$this->assertSame('https://nijie.info/view.php?id=66384', (string) $this->handler->getLastRequest()->getUri()); $this->assertSame('https://nijie.info/view.php?id=66384', (string) $this->handler->getLastRequest()->getUri());
} }
} }
public function testHasHtmlInAuthorProfile()
{
$responseText = file_get_contents(__DIR__ . '/../../fixture/Nijie/testHasHtmlInAuthorProfileResponse.html');
$this->createResolver(NijieResolver::class, $responseText);
$metadata = $this->resolver->resolve('https://nijie.info/view.php?id=285698');
$this->assertSame('JK文化祭コスプレ喫茶', $metadata->title);
$this->assertSame('投稿者: ままままま' . PHP_EOL .
'https://www.pixiv.net/fanbox/creator/32045169' . PHP_EOL .
'ピクシブのファンボックスでこっちに上げてた一次創作のノリでえっちなやつ描いてます' . PHP_EOL .
'二次創作のえっちなやつは相変わらずこっち' . PHP_EOL . '健全目なのはついったー', $metadata->description);
$this->assertSame('https://pic.nijie.net/02/nijie_picture/540086_20181028112046_0.png', $metadata->image);
$this->assertSame(['バニーガール'], $metadata->tags);
if ($this->shouldUseMock()) {
$this->assertSame('https://nijie.info/view.php?id=285698', (string) $this->handler->getLastRequest()->getUri());
}
}
} }

File diff suppressed because one or more lines are too long