Merge branch 'develop' into feature/resolver-dlsite-affiliate
This commit is contained in:
@@ -3,6 +3,7 @@
|
||||
namespace App\MetadataResolver;
|
||||
|
||||
use GuzzleHttp\Client;
|
||||
use Symfony\Component\DomCrawler\Crawler;
|
||||
|
||||
class IwaraResolver implements Resolver
|
||||
{
|
||||
@@ -19,46 +20,40 @@ class IwaraResolver implements Resolver
|
||||
public function resolve(string $url): Metadata
|
||||
{
|
||||
$res = $this->client->get($url);
|
||||
|
||||
if ($res->getStatusCode() === 200) {
|
||||
$dom = new \DOMDocument();
|
||||
@$dom->loadHTML(mb_convert_encoding($res->getBody(), 'HTML-ENTITIES', 'UTF-8'));
|
||||
$xpath = new \DOMXPath($dom);
|
||||
|
||||
$metadata = new Metadata();
|
||||
$html = (string) $res->getBody();
|
||||
$crawler = new Crawler($html);
|
||||
|
||||
// find title
|
||||
foreach ($xpath->query('//title') as $node) {
|
||||
$content = $node->textContent;
|
||||
if (!empty($content)) {
|
||||
$metadata->title = $content;
|
||||
break;
|
||||
$infoElements = $crawler->filter('#video-player + div, .field-name-field-video-url + div, .field-name-field-images + div');
|
||||
$title = $infoElements->filter('h1.title')->text();
|
||||
$author = $infoElements->filter('.username')->text();
|
||||
$description = $infoElements->filter('.field-type-text-with-summary')->text('');
|
||||
$tags = $infoElements->filter('a[href^="/videos"], a[href^="/images"]')->extract('_text');
|
||||
// 役に立たないタグを削除する
|
||||
$tags = array_values(array_diff($tags, ['Uncategorized', 'Other']));
|
||||
array_push($tags, $author);
|
||||
|
||||
$metadata->title = $title;
|
||||
$metadata->description = '投稿者: ' . $author . PHP_EOL . $description;
|
||||
$metadata->tags = $tags;
|
||||
|
||||
// iwara video
|
||||
if ($crawler->filter('#video-player')->count()) {
|
||||
$metadata->image = 'https:' . $crawler->filter('#video-player')->attr('poster');
|
||||
}
|
||||
|
||||
// youtube
|
||||
if ($crawler->filter('iframe[src^="//www.youtube.com"]')->count()) {
|
||||
if (preg_match('~youtube\.com/embed/(\S+)\?~', $crawler->filter('iframe[src^="//www.youtube.com"]')->attr('src'), $matches) === 1) {
|
||||
$youtubeId = $matches[1];
|
||||
$metadata->image = 'https://img.youtube.com/vi/' . $youtubeId . '/maxresdefault.jpg';
|
||||
}
|
||||
}
|
||||
|
||||
// find thumbnail
|
||||
foreach ($xpath->query('//*[@id="video-player"]') as $node) {
|
||||
$poster = $node->getAttribute('poster');
|
||||
if (!empty($poster)) {
|
||||
if (strpos($poster, '//') === 0) {
|
||||
$poster = 'https:' . $poster;
|
||||
}
|
||||
$metadata->image = $poster;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (empty($metadata->image)) {
|
||||
// YouTube embedded?
|
||||
foreach ($xpath->query('//div[@class="embedded-video"]//iframe') as $node) {
|
||||
$src = $node->getAttribute('src');
|
||||
if (preg_match('~youtube\.com/embed/(\S+)\?~', $src, $matches) !== -1) {
|
||||
$youtubeId = $matches[1];
|
||||
$iwaraThumbUrl = 'https://i.iwara.tv/sites/default/files/styles/thumbnail/public/video_embed_field_thumbnails/youtube/' . $youtubeId . '.jpg';
|
||||
|
||||
$metadata->image = $iwaraThumbUrl;
|
||||
break;
|
||||
}
|
||||
}
|
||||
// images
|
||||
if ($crawler->filter('.field-name-field-images')->count()) {
|
||||
$metadata->image = 'https:' . $crawler->filter('.field-name-field-images a')->first()->attr('href');
|
||||
}
|
||||
|
||||
return $metadata;
|
||||
|
@@ -15,9 +15,9 @@ class MetadataResolver implements Resolver
|
||||
'~www\.melonbooks\.co\.jp/detail/detail\.php~' => MelonbooksResolver::class,
|
||||
'~ec\.toranoana\.(jp|shop)/(tora|joshi)(_[rd]+)?/(ec|digi)/item/~' => ToranoanaResolver::class,
|
||||
'~iwara\.tv/videos/.*~' => IwaraResolver::class,
|
||||
'~www\.dlsite\.com/.+/(work|announce)/=/product_id/..\d+(\.html)?~' => DLsiteResolver::class,
|
||||
'~www\.dlsite\.com/.+/dlaf/=/link/(work|announce)/aid/.+/..\d+(\.html)?~' => DLsiteResolver::class,
|
||||
'~www\.dlsite\.com/.+/dlaf/=/aid/.+/url/.+~' => DLsiteResolver::class,
|
||||
'~www\.dlsite\.com/.*/(work|announce)/=/product_id/..\d+(\.html)?~' => DLsiteResolver::class,
|
||||
'~www\.dlsite\.com/.*/dlaf/=/link/(work|announce)/aid/.+/..\d+(\.html)?~' => DLsiteResolver::class,
|
||||
'~www\.dlsite\.com/.*/dlaf/=/aid/.+/url/.+~' => DLsiteResolver::class,
|
||||
'~dlsite\.jp/...tw/..\d+~' => DLsiteResolver::class,
|
||||
'~www\.pixiv\.net/member_illust\.php\?illust_id=\d+~' => PixivResolver::class,
|
||||
'~www\.pixiv\.net/user/\d+/series/\d+~' => PixivResolver::class,
|
||||
|
@@ -3,6 +3,7 @@
|
||||
namespace App\MetadataResolver;
|
||||
|
||||
use GuzzleHttp\Client;
|
||||
use Symfony\Component\DomCrawler\Crawler;
|
||||
|
||||
class NijieResolver implements Resolver
|
||||
{
|
||||
@@ -30,23 +31,29 @@ class NijieResolver implements Resolver
|
||||
$url = preg_replace('~view_popup\.php~', 'view.php', $url);
|
||||
}
|
||||
|
||||
$client = $this->client;
|
||||
$res = $client->get($url);
|
||||
$res = $this->client->get($url);
|
||||
if ($res->getStatusCode() === 200) {
|
||||
$metadata = $this->ogpResolver->parse($res->getBody());
|
||||
$html = (string) $res->getBody();
|
||||
$metadata = $this->ogpResolver->parse($html);
|
||||
$crawler = new Crawler($html);
|
||||
|
||||
$dom = new \DOMDocument();
|
||||
@$dom->loadHTML(mb_convert_encoding($res->getBody(), 'HTML-ENTITIES', 'UTF-8'));
|
||||
$xpath = new \DOMXPath($dom);
|
||||
$dataNode = $xpath->query('//script[substring(@type, string-length(@type) - 3, 4) = "json"]');
|
||||
foreach ($dataNode as $node) {
|
||||
// 改行がそのまま入っていることがあるのでデコード前にエスケープが必要
|
||||
$imageData = json_decode(preg_replace('/\r?\n/', '\n', $node->nodeValue), true);
|
||||
if (isset($imageData['thumbnailUrl']) && !ends_with($imageData['thumbnailUrl'], '.gif') && !ends_with($imageData['thumbnailUrl'], '.mp4')) {
|
||||
$metadata->image = preg_replace('~nijie\\.info/.*/nijie_picture/~', 'nijie.info/nijie_picture/', $imageData['thumbnailUrl']);
|
||||
break;
|
||||
}
|
||||
// DomCrawler内でjson内の日本語がHTMLエンティティに変換されるのでhtml_entity_decode
|
||||
$json = html_entity_decode($crawler->filter('script[type="application/ld+json"]')->first()->text());
|
||||
|
||||
// 改行がそのまま入っていることがあるのでデコード前にエスケープが必要
|
||||
$data = json_decode(preg_replace('/\r?\n/', '\n', $json), true);
|
||||
|
||||
$metadata->title = $data['name'];
|
||||
$metadata->description = '投稿者: ' . $data['author']['name'] . PHP_EOL . $data['description'];
|
||||
if (
|
||||
isset($data['thumbnailUrl']) &&
|
||||
!ends_with($data['thumbnailUrl'], '.gif') &&
|
||||
!ends_with($data['thumbnailUrl'], '.mp4')
|
||||
) {
|
||||
// サムネイルからメイン画像に
|
||||
$metadata->image = str_replace('__rs_l160x160/', '', $data['thumbnailUrl']);
|
||||
}
|
||||
$metadata->tags = $crawler->filter('#view-tag span.tag_name')->extract('_text');
|
||||
|
||||
return $metadata;
|
||||
} else {
|
||||
|
Reference in New Issue
Block a user