データの取得方法をAPIからスクレイピングに変更

より多くのタグと高画質なサムネイルを取得するように変更
This commit is contained in:
eai04191 2019-09-09 13:51:07 +09:00
parent eecace33bd
commit 0f530099b4
4 changed files with 2690 additions and 12 deletions

View File

@ -3,6 +3,7 @@
namespace App\MetadataResolver;
use GuzzleHttp\Client;
use Symfony\Component\DomCrawler\Crawler;
class XtubeResolver implements Resolver
{
@ -18,20 +19,24 @@ class XtubeResolver implements Resolver
public function resolve(string $url): Metadata
{
if (preg_match('~www\.xtube\.com/video-watch/.*-(\d+)$~', $url, $matches) !== 1) {
if (preg_match('~www\.xtube\.com/video-watch/.*-(\d+)$~', $url) !== 1) {
throw new \RuntimeException("Unmatched URL Pattern: $url");
}
$videoid = $matches[1];
$res = $this->client->get('https://www.xtube.com/webmaster/api/getvideobyid?video_id=' . $videoid);
$res = $this->client->get($url);
if ($res->getStatusCode() === 200) {
$data = json_decode($res->getBody()->getContents(), true);
$html = (string) $res->getBody();
$metadata = new Metadata();
$crawler = new Crawler($html);
$metadata->title = $data['title'] ?? '';
$metadata->description = strip_tags(str_replace('\n', PHP_EOL, html_entity_decode($data['description'] ?? '')));
$metadata->image = str_replace('eSuQ8f', 'eSK08f', $data['thumb'] ?? ''); // 300x169 to 300x210
$metadata->tags = array_values(array_unique($data['tags']));
// poster URL抽出
$playerConfig = explode("\n", trim($crawler->filter('#playerWrapper script')->last()->text()));
preg_match('~https:\\\/\\\/cdn\d+-s-hw-e5\.xtube\.com\\\/m=(?P<size>.{8})\\\/videos\\\/\d{6}\\\/\d{2}\\\/.{5}-.{4}-\\\/original\\\/\d+\.jpg~', $playerConfig[0], $matches);
$metadata->image = str_replace('\/', '/', $matches[0]);
$metadata->title = trim($crawler->filter('.underPlayerRateForm h1')->text(''));
$metadata->description = trim($crawler->filter('.fullDescription ')->text(''));
$metadata->tags = $crawler->filter('.tagsCategories a')->extract('_text');
return $metadata;
} else {

View File

@ -20,15 +20,18 @@ class XtubeResolverTest extends TestCase
public function test()
{
$responseText = file_get_contents(__DIR__ . '/../../fixture/Xtube/test.json');
$responseText = file_get_contents(__DIR__ . '/../../fixture/Xtube/video.html');
$this->createResolver(XtubeResolver::class, $responseText);
$metadata = $this->resolver->resolve('https://www.xtube.com/video-watch/homegrown-big-tits-18634762');
$this->assertEquals('Homegrown Big Tits', $metadata->title);
$this->assertEquals('Dedicated to the fans of the beautiful amateur women with big natural tits. All user submitted - you can see big boob amateur hotties fucking and sucking as their tits bounce and sway.', $metadata->description);
$this->assertRegExp('~https://cdn\d+-s-hw-e5\.xtube\.com/m=eSK08f/videos/201302/07/RF4Nk-S774-/240X180/1\.jpg~', $metadata->image);
$this->assertEquals(['bigtits', 'homeg'], $metadata->tags);
$this->assertRegExp('~https://cdn\d+-s-hw-e5\.xtube\.com/m=eaAaaEFb/videos/201302/07/RF4Nk-S774-/original/1\.jpg~', $metadata->image);
$this->assertEquals(['Amateur', 'Blowjob', 'Big Boobs', 'bigtits', 'homeg'], $metadata->tags);
if ($this->shouldUseMock()) {
$this->assertSame('https://www.xtube.com/video-watch/homegrown-big-tits-18634762', (string) $this->handler->getLastRequest()->getUri());
}
}
public function testNotMatch()

View File

@ -1 +0,0 @@
{"duration":"180","views":3146,"video_id":"RF4Nk-S774-","rating":"4.000","ratings":"1","title":"Homegrown Big Tits","description":"Dedicated to the fans of the beautiful amateur women with big natural tits. All user submitted - you can see big boob amateur hotties fucking and sucking as their tits bounce and sway.","url":"https:\/\/www.xtube.com\/video-watch\/homegrown-big-tits-18634762","embedCode":"https:\/\/www.xtube.com\/video-watch\/embedded\/homegrown-big-tits-18634762","default_thumb":"https:\/\/cdn5-s-hw-e5.xtube.com\/m=eSuQ8f\/videos\/201302\/07\/RF4Nk-S774-\/240X180\/1.jpg","thumb":"https:\/\/cdn5-s-hw-e5.xtube.com\/m=eSuQ8f\/videos\/201302\/07\/RF4Nk-S774-\/240X180\/1.jpg","publish_date":"2013-02-07 17:41:10","tags":{"1396":"bigtits","472012":"homeg"},"thumbs":[{"width":300,"height":210,"src":"https:\/\/cdn4-s-hw-e5.xtube.com\/m=eSK08f\/videos\/201302\/07\/RF4Nk-S774-\/240X180\/1.jpg"},{"width":300,"height":210,"src":"https:\/\/cdn4-s-hw-e5.xtube.com\/m=eSK08f\/videos\/201302\/07\/RF4Nk-S774-\/240X180\/2.jpg"},{"width":300,"height":210,"src":"https:\/\/cdn10-s-hw-e5.xtube.com\/m=eSK08f\/videos\/201302\/07\/RF4Nk-S774-\/240X180\/3.jpg"}]}

2671
tests/fixture/Xtube/video.html vendored Normal file

File diff suppressed because one or more lines are too long