Merge pull request #259 from eai04191/feature/resolver-nijie

NijieResolver 修正など
This commit is contained in:
shibafu 2019-08-31 03:12:34 +09:00 committed by GitHub
commit ac2077af49
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 409 additions and 130 deletions

View File

@ -3,6 +3,7 @@
namespace App\MetadataResolver; namespace App\MetadataResolver;
use GuzzleHttp\Client; use GuzzleHttp\Client;
use Symfony\Component\DomCrawler\Crawler;
class NijieResolver implements Resolver class NijieResolver implements Resolver
{ {
@ -30,23 +31,29 @@ class NijieResolver implements Resolver
$url = preg_replace('~view_popup\.php~', 'view.php', $url); $url = preg_replace('~view_popup\.php~', 'view.php', $url);
} }
$client = $this->client; $res = $this->client->get($url);
$res = $client->get($url);
if ($res->getStatusCode() === 200) { if ($res->getStatusCode() === 200) {
$metadata = $this->ogpResolver->parse($res->getBody()); $html = (string) $res->getBody();
$metadata = $this->ogpResolver->parse($html);
$crawler = new Crawler($html);
$dom = new \DOMDocument(); // DomCrawler内でjson内の日本語がHTMLエンティティに変換されるのでhtml_entity_decode
@$dom->loadHTML(mb_convert_encoding($res->getBody(), 'HTML-ENTITIES', 'UTF-8')); $json = html_entity_decode($crawler->filter('script[type="application/ld+json"]')->first()->text());
$xpath = new \DOMXPath($dom);
$dataNode = $xpath->query('//script[substring(@type, string-length(@type) - 3, 4) = "json"]'); // 改行がそのまま入っていることがあるのでデコード前にエスケープが必要
foreach ($dataNode as $node) { $data = json_decode(preg_replace('/\r?\n/', '\n', $json), true);
// 改行がそのまま入っていることがあるのでデコード前にエスケープが必要
$imageData = json_decode(preg_replace('/\r?\n/', '\n', $node->nodeValue), true); $metadata->title = $data['name'];
if (isset($imageData['thumbnailUrl']) && !ends_with($imageData['thumbnailUrl'], '.gif') && !ends_with($imageData['thumbnailUrl'], '.mp4')) { $metadata->description = '投稿者: ' . $data['author']['name'] . PHP_EOL . $data['description'];
$metadata->image = preg_replace('~nijie\\.info/.*/nijie_picture/~', 'nijie.info/nijie_picture/', $imageData['thumbnailUrl']); if (
break; isset($data['thumbnailUrl']) &&
} !ends_with($data['thumbnailUrl'], '.gif') &&
!ends_with($data['thumbnailUrl'], '.mp4')
) {
// サムネイルからメイン画像に
$metadata->image = str_replace('__rs_l160x160/', '', $data['thumbnailUrl']);
} }
$metadata->tags = $crawler->filter('#view-tag span.tag_name')->extract('_text');
return $metadata; return $metadata;
} else { } else {

View File

@ -25,10 +25,10 @@ class NijieResolverTest extends TestCase
$this->createResolver(NijieResolver::class, $responseText); $this->createResolver(NijieResolver::class, $responseText);
$metadata = $this->resolver->resolve('https://nijie.info/view.php?id=66384'); $metadata = $this->resolver->resolve('https://nijie.info/view.php?id=66384');
$this->assertEquals('チンポップくんの日常ep.1「チンポップくんと釣り」 | ニジエ運営', $metadata->title); $this->assertSame('チンポップくんの日常ep.1「チンポップくんと釣り」', $metadata->title);
$this->assertEquals("メールマガジン漫画のバックナンバー第一話です!\r\n最新話はメールマガジンより配信中です。", $metadata->description); $this->assertSame('投稿者: ニジエ運営' . PHP_EOL . 'メールマガジン漫画のバックナンバー第一話です!' . PHP_EOL . '最新話はメールマガジンより配信中です。', $metadata->description);
$this->assertRegExp('/pic\d+\.nijie\.info/', $metadata->image); $this->assertSame('https://pic.nijie.net/04/nijie_picture/38_20131130155623.png', $metadata->image);
$this->assertNotRegExp('~/diff/main/~', $metadata->image); $this->assertSame(['ニジエたん', '釣り', 'チンポップ君の日常', '公式漫画'], $metadata->tags);
if ($this->shouldUseMock()) { if ($this->shouldUseMock()) {
$this->assertSame('https://nijie.info/view.php?id=66384', (string) $this->handler->getLastRequest()->getUri()); $this->assertSame('https://nijie.info/view.php?id=66384', (string) $this->handler->getLastRequest()->getUri());
} }
@ -41,10 +41,10 @@ class NijieResolverTest extends TestCase
$this->createResolver(NijieResolver::class, $responseText); $this->createResolver(NijieResolver::class, $responseText);
$metadata = $this->resolver->resolve('https://nijie.info/view.php?id=202707'); $metadata = $this->resolver->resolve('https://nijie.info/view.php?id=202707');
$this->assertEquals('ニジエ壁紙 | ニジエ運営', $metadata->title); $this->assertSame('ニジエ壁紙', $metadata->title);
$this->assertEquals("ニジエのPCとiphone用(4.7inch推奨)の壁紙です。\r\n保存してご自由にお使いくださいませ。", $metadata->description); $this->assertSame('投稿者: ニジエ運営' . PHP_EOL . 'ニジエのPCとiphone用(4.7inch推奨)の壁紙です。' . PHP_EOL . '保存してご自由にお使いくださいませ。', $metadata->description);
$this->assertRegExp('/pic\d+\.nijie\.info/', $metadata->image); $this->assertSame('https://pic.nijie.net/03/nijie_picture/38_20170209185801_0.png', $metadata->image);
$this->assertNotRegExp('~/diff/main/~', $metadata->image); $this->assertSame(['ニジエたん', '壁紙'], $metadata->tags);
if ($this->shouldUseMock()) { if ($this->shouldUseMock()) {
$this->assertSame('https://nijie.info/view.php?id=202707', (string) $this->handler->getLastRequest()->getUri()); $this->assertSame('https://nijie.info/view.php?id=202707', (string) $this->handler->getLastRequest()->getUri());
} }
@ -57,9 +57,10 @@ class NijieResolverTest extends TestCase
$this->createResolver(NijieResolver::class, $responseText); $this->createResolver(NijieResolver::class, $responseText);
$metadata = $this->resolver->resolve('https://nijie.info/view.php?id=9537'); $metadata = $this->resolver->resolve('https://nijie.info/view.php?id=9537');
$this->assertEquals('ニジエがgifに対応したんだってね 奥さん | 黒末アプコ', $metadata->title); $this->assertSame('ニジエがgifに対応したんだってね 奥さん', $metadata->title);
$this->assertEquals('アニメgifとか専門外なのでよくわかりませんでした', $metadata->description); $this->assertSame('投稿者: 黒末アプコ' . PHP_EOL . 'アニメgifとか専門外なのでよくわかりませんでした', $metadata->description);
$this->assertRegExp('~/nijie\.info/pic/logo~', $metadata->image); $this->assertStringStartsWith('https://nijie.info/pic/logo/nijie_logo_og.png', $metadata->image);
$this->assertSame(['おっぱい', '陥没乳首', '眼鏡', 'GIFアニメ', 'ぶるんぶるん', 'アニメgif'], $metadata->tags);
if ($this->shouldUseMock()) { if ($this->shouldUseMock()) {
$this->assertSame('https://nijie.info/view.php?id=9537', (string) $this->handler->getLastRequest()->getUri()); $this->assertSame('https://nijie.info/view.php?id=9537', (string) $this->handler->getLastRequest()->getUri());
} }
@ -72,74 +73,60 @@ class NijieResolverTest extends TestCase
$this->createResolver(NijieResolver::class, $responseText); $this->createResolver(NijieResolver::class, $responseText);
$metadata = $this->resolver->resolve('https://nijie.info/view.php?id=256283'); $metadata = $this->resolver->resolve('https://nijie.info/view.php?id=256283');
$this->assertEquals('てすと | ニジエ運営', $metadata->title); $this->assertSame('てすと', $metadata->title);
$this->assertEquals("H264動画てすと あとで消します\r\n\r\n今の所、H264コーデックのみ、出力時に音声なしにしないと投稿できません\r\n動画は勝手にループします", $metadata->description); $this->assertSame('投稿者: ニジエ運営' . PHP_EOL . 'H264動画てすと あとで消します' . PHP_EOL . PHP_EOL . '今の所、H264コーデックのみ、出力時に音声なしにしないと投稿できません' . PHP_EOL . '動画は勝手にループします', $metadata->description);
$this->assertRegExp('~/nijie\.info/pic/logo~', $metadata->image); $this->assertStringStartsWith('https://nijie.info/pic/logo/nijie_logo_og.png', $metadata->image);
$this->assertSame([], $metadata->tags);
if ($this->shouldUseMock()) { if ($this->shouldUseMock()) {
$this->assertSame('https://nijie.info/view.php?id=256283', (string) $this->handler->getLastRequest()->getUri()); $this->assertSame('https://nijie.info/view.php?id=256283', (string) $this->handler->getLastRequest()->getUri());
} }
} }
public function testStandardPictureSp() public function testViewPopup()
{
$responseText = file_get_contents(__DIR__ . '/../../fixture/Nijie/testStandardPictureResponse.html');
$this->createResolver(NijieResolver::class, $responseText);
$metadata = $this->resolver->resolve('https://nijie.info/view_popup.php?id=66384');
$this->assertSame('チンポップくんの日常ep.1「チンポップくんと釣り」', $metadata->title);
$this->assertSame('投稿者: ニジエ運営' . PHP_EOL . 'メールマガジン漫画のバックナンバー第一話です!' . PHP_EOL . '最新話はメールマガジンより配信中です。', $metadata->description);
$this->assertSame('https://pic.nijie.net/04/nijie_picture/38_20131130155623.png', $metadata->image);
$this->assertSame(['ニジエたん', '釣り', 'チンポップ君の日常', '公式漫画'], $metadata->tags);
if ($this->shouldUseMock()) {
$this->assertSame('https://nijie.info/view.php?id=66384', (string) $this->handler->getLastRequest()->getUri());
}
}
public function testSp()
{ {
$responseText = file_get_contents(__DIR__ . '/../../fixture/Nijie/testStandardPictureResponse.html'); $responseText = file_get_contents(__DIR__ . '/../../fixture/Nijie/testStandardPictureResponse.html');
$this->createResolver(NijieResolver::class, $responseText); $this->createResolver(NijieResolver::class, $responseText);
$metadata = $this->resolver->resolve('https://sp.nijie.info/view.php?id=66384'); $metadata = $this->resolver->resolve('https://sp.nijie.info/view.php?id=66384');
$this->assertEquals('チンポップくんの日常ep.1「チンポップくんと釣り」 | ニジエ運営', $metadata->title); $this->assertSame('チンポップくんの日常ep.1「チンポップくんと釣り」', $metadata->title);
$this->assertEquals("メールマガジン漫画のバックナンバー第一話です!\r\n最新話はメールマガジンより配信中です。", $metadata->description); $this->assertSame('投稿者: ニジエ運営' . PHP_EOL . 'メールマガジン漫画のバックナンバー第一話です!' . PHP_EOL . '最新話はメールマガジンより配信中です。', $metadata->description);
$this->assertRegExp('/pic\d+\.nijie\.info/', $metadata->image); $this->assertSame('https://pic.nijie.net/04/nijie_picture/38_20131130155623.png', $metadata->image);
$this->assertNotRegExp('~/diff/main/~', $metadata->image); $this->assertSame(['ニジエたん', '釣り', 'チンポップ君の日常', '公式漫画'], $metadata->tags);
if ($this->shouldUseMock()) { if ($this->shouldUseMock()) {
$this->assertSame('https://nijie.info/view.php?id=66384', (string) $this->handler->getLastRequest()->getUri()); $this->assertSame('https://nijie.info/view.php?id=66384', (string) $this->handler->getLastRequest()->getUri());
} }
} }
public function testMultiplePictureSp() public function testSpViewPopup()
{ {
$responseText = file_get_contents(__DIR__ . '/../../fixture/Nijie/testMultiplePictureResponse.html'); $responseText = file_get_contents(__DIR__ . '/../../fixture/Nijie/testStandardPictureResponse.html');
$this->createResolver(NijieResolver::class, $responseText); $this->createResolver(NijieResolver::class, $responseText);
$metadata = $this->resolver->resolve('https://sp.nijie.info/view.php?id=202707'); $metadata = $this->resolver->resolve('https://sp.nijie.info/view_popup.php?id=66384');
$this->assertEquals('ニジエ壁紙 | ニジエ運営', $metadata->title); $this->assertSame('チンポップくんの日常ep.1「チンポップくんと釣り」', $metadata->title);
$this->assertEquals("ニジエのPCとiphone用(4.7inch推奨)の壁紙です。\r\n保存してご自由にお使いくださいませ。", $metadata->description); $this->assertSame('投稿者: ニジエ運営' . PHP_EOL . 'メールマガジン漫画のバックナンバー第一話です!' . PHP_EOL . '最新話はメールマガジンより配信中です。', $metadata->description);
$this->assertRegExp('/pic\d+\.nijie\.info/', $metadata->image); $this->assertSame('https://pic.nijie.net/04/nijie_picture/38_20131130155623.png', $metadata->image);
$this->assertNotRegExp('~/diff/main/~', $metadata->image); $this->assertSame(['ニジエたん', '釣り', 'チンポップ君の日常', '公式漫画'], $metadata->tags);
if ($this->shouldUseMock()) { if ($this->shouldUseMock()) {
$this->assertSame('https://nijie.info/view.php?id=202707', (string) $this->handler->getLastRequest()->getUri()); $this->assertSame('https://nijie.info/view.php?id=66384', (string) $this->handler->getLastRequest()->getUri());
}
}
public function testAnimationGifSp()
{
$responseText = file_get_contents(__DIR__ . '/../../fixture/Nijie/testAnimationGifResponse.html');
$this->createResolver(NijieResolver::class, $responseText);
$metadata = $this->resolver->resolve('https://nijie.info/view.php?id=9537');
$this->assertEquals('ニジエがgifに対応したんだってね 奥さん | 黒末アプコ', $metadata->title);
$this->assertEquals('アニメgifとか専門外なのでよくわかりませんでした', $metadata->description);
$this->assertRegExp('~/nijie\.info/pic/logo~', $metadata->image);
if ($this->shouldUseMock()) {
$this->assertSame('https://nijie.info/view.php?id=9537', (string) $this->handler->getLastRequest()->getUri());
}
}
public function testMp4MovieSp()
{
$responseText = file_get_contents(__DIR__ . '/../../fixture/Nijie/testMp4MovieResponse.html');
$this->createResolver(NijieResolver::class, $responseText);
$metadata = $this->resolver->resolve('https://sp.nijie.info/view.php?id=256283');
$this->assertEquals('てすと | ニジエ運営', $metadata->title);
$this->assertEquals("H264動画てすと あとで消します\r\n\r\n今の所、H264コーデックのみ、出力時に音声なしにしないと投稿できません\r\n動画は勝手にループします", $metadata->description);
$this->assertRegExp('~/nijie\.info/pic/logo~', $metadata->image);
if ($this->shouldUseMock()) {
$this->assertSame('https://nijie.info/view.php?id=256283', (string) $this->handler->getLastRequest()->getUri());
} }
} }
} }

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long