tissue/app/MetadataResolver/IwaraResolver.php

68 lines
2.5 KiB
PHP
Raw Normal View History

2018-11-20 23:31:57 +09:00
<?php
namespace App\MetadataResolver;
use GuzzleHttp\Client;
use Symfony\Component\DomCrawler\Crawler;
2018-11-20 23:31:57 +09:00
class IwaraResolver implements Resolver
{
/**
* @var Client
*/
private $client;
public function __construct(Client $client)
{
$this->client = $client;
}
2018-11-20 23:31:57 +09:00
public function resolve(string $url): Metadata
{
$res = $this->client->get($url);
2018-11-20 23:31:57 +09:00
if ($res->getStatusCode() === 200) {
$metadata = new Metadata();
$html = (string) $res->getBody();
$crawler = new Crawler($html);
$descriptionElement = $crawler->filter('#video-player + div, .field-name-field-video-url + div, .field-name-field-images + div');
$title = $descriptionElement->filter('h1.title')->text();
$author = $descriptionElement->filter('.username')->text();
$description = '';
if ($descriptionElement->filter('.field-type-text-with-summary')->count()) {
$description = $descriptionElement->filter('.field-type-text-with-summary')->text();
}
$tags = $descriptionElement->filter('a[href^="/video-categories"], a[href^="/images"]')->extract('_text');
// 役に立たないタグを削除する
$tags = array_values(array_diff($tags, ['Uncategorized', 'Other']));
2019-08-22 05:23:28 +09:00
array_push($tags, $author);
$metadata->title = $title;
$metadata->description = '投稿者: ' . $author . PHP_EOL . $description;
$metadata->tags = $tags;
// iwara video
if ($crawler->filter('#video-player')->count()) {
$metadata->image = 'https:' . $crawler->filter('#video-player')->attr('poster');
2018-11-20 23:31:57 +09:00
}
// youtube
if ($crawler->filter('iframe[src^="//www.youtube.com"]')->count()) {
if (preg_match('~youtube\.com/embed/(\S+)\?~', $crawler->filter('iframe[src^="//www.youtube.com"]')->attr('src'), $matches) === 1) {
$youtubeId = $matches[1];
$metadata->image = 'https://img.youtube.com/vi/' . $youtubeId . '/maxresdefault.jpg';
2018-11-20 23:31:57 +09:00
}
}
// images
if ($crawler->filter('.field-name-field-images')->count()) {
$metadata->image = 'https:' . $crawler->filter('.field-name-field-images a')->first()->attr('href');
2018-11-20 23:31:57 +09:00
}
return $metadata;
} else {
throw new \RuntimeException("{$res->getStatusCode()}: $url");
}
}
2019-01-15 00:05:01 +09:00
}