tissue/app/MetadataResolver/DLsiteResolver.php

62 lines
2.2 KiB
PHP
Raw Normal View History

2019-01-14 04:22:40 +09:00
<?php
namespace App\MetadataResolver;
use GuzzleHttp\Client;
2019-01-14 04:22:40 +09:00
class DLsiteResolver implements Resolver
{
/**
* @var Client
*/
private $client;
/**
* @var OGPResolver
*/
private $ogpResolver;
public function __construct(Client $client, OGPResolver $ogpResolver)
{
$this->client = $client;
$this->ogpResolver = $ogpResolver;
}
2019-01-14 04:22:40 +09:00
public function resolve(string $url): Metadata
{
2019-04-30 07:52:45 +09:00
//スマホページの場合はPCページに正規化
if (strpos($url, '-touch') !== false) {
$url = str_replace('-touch', '', $url);
}
$res = $this->client->get($url);
2019-01-14 04:22:40 +09:00
if ($res->getStatusCode() === 200) {
$metadata = $this->ogpResolver->parse($res->getBody());
2019-03-08 06:21:33 +09:00
$dom = new \DOMDocument();
@$dom->loadHTML(mb_convert_encoding($res->getBody(), 'HTML-ENTITIES', 'UTF-8'));
$xpath = new \DOMXPath($dom);
2019-03-08 06:21:33 +09:00
// 抽出
$title = $xpath->query('//title')->item(0)->textContent;
preg_match('~\[(.+)\] \| DLsite.+$~', $title, $match);
2019-03-08 06:21:33 +09:00
$maker = $match[1];
// makerに一致するthのテキストを探す
$makerHead = $xpath->query('//a[contains(text(), "'.$maker.'")]/ancestor::tr/th')->item(0)->textContent;
2019-03-08 06:21:33 +09:00
// 余分な文を消す
2019-05-01 08:33:05 +09:00
$metadata->title = trim(preg_replace('~ \[.+\] \| DLsite(がるまに)?$~', '', $metadata->title));
2019-03-08 06:29:16 +09:00
$metadata->description = trim(preg_replace('~「DLsite.+」は同人誌・同人ゲーム・同人音声のダウンロードショップ。お気に入りの作品をすぐダウンロードできてすぐ楽しめる毎日更新しているのであなたが探している作品にきっと出会えます。国内最大級の二次元総合ダウンロードショップ「DLsite」$~', '', $metadata->description));
2019-03-08 06:21:33 +09:00
// 整形
$metadata->description = $makerHead.': ' . $maker . PHP_EOL . $metadata->description;
$metadata->image = str_replace('img_sam.jpg', 'img_main.jpg', $metadata->image);
2019-01-15 00:05:01 +09:00
2019-01-14 04:22:40 +09:00
return $metadata;
} else {
throw new \RuntimeException("{$res->getStatusCode()}: $url");
}
}
2019-01-15 00:05:01 +09:00
}