2018-04-15 02:05:41 +09:00
|
|
|
|
<?php
|
|
|
|
|
|
|
|
|
|
namespace App\MetadataResolver;
|
|
|
|
|
|
2019-02-23 16:56:13 +09:00
|
|
|
|
use GuzzleHttp\Client;
|
2019-02-11 01:43:07 +09:00
|
|
|
|
use GuzzleHttp\Exception\ClientException;
|
|
|
|
|
use GuzzleHttp\Exception\ServerException;
|
|
|
|
|
|
2018-04-15 02:05:41 +09:00
|
|
|
|
class MetadataResolver implements Resolver
|
|
|
|
|
{
|
|
|
|
|
public $rules = [
|
|
|
|
|
'~(((sp\.)?seiga\.nicovideo\.jp/seiga(/#!)?|nico\.ms))/im~' => NicoSeigaResolver::class,
|
2018-12-17 23:27:19 +09:00
|
|
|
|
'~nijie\.info/view(_popup)?\.php~' => NijieResolver::class,
|
2018-06-08 00:44:42 +09:00
|
|
|
|
'~komiflo\.com(/#!)?/comics/(\\d+)~' => KomifloResolver::class,
|
2018-06-13 01:00:24 +09:00
|
|
|
|
'~www\.melonbooks\.co\.jp/detail/detail\.php~' => MelonbooksResolver::class,
|
2019-04-20 22:38:36 +09:00
|
|
|
|
'~ec\.toranoana\.(jp|shop)/(tora|joshi)(_[rd]+)?/(ec|digi)/item/~' => ToranoanaResolver::class,
|
2019-09-01 16:55:37 +09:00
|
|
|
|
'~iwara\.tv/(videos|images)/.*~' => IwaraResolver::class,
|
2019-06-16 05:50:22 +09:00
|
|
|
|
'~www\.dlsite\.com/.*/(work|announce)/=/product_id/..\d+(\.html)?~' => DLsiteResolver::class,
|
2019-09-01 03:43:37 +09:00
|
|
|
|
'~www\.dlsite\.com/.*/dlaf/=/link/(work|announce)/aid/.+/..\d+(\.html)?~' => DLsiteResolver::class,
|
|
|
|
|
'~www\.dlsite\.com/.*/dlaf/=/aid/.+/url/.+~' => DLsiteResolver::class,
|
2019-06-16 05:24:33 +09:00
|
|
|
|
'~dlsite\.jp/...tw/..\d+~' => DLsiteResolver::class,
|
2019-01-14 16:01:58 +09:00
|
|
|
|
'~www\.pixiv\.net/member_illust\.php\?illust_id=\d+~' => PixivResolver::class,
|
2019-09-25 08:42:28 +09:00
|
|
|
|
'~www\.pixiv\.net/artworks/\d+~' => PixivResolver::class,
|
2019-06-15 05:33:54 +09:00
|
|
|
|
'~www\.pixiv\.net/user/\d+/series/\d+~' => PixivResolver::class,
|
2019-01-14 22:55:01 +09:00
|
|
|
|
'~fantia\.jp/posts/\d+~' => FantiaResolver::class,
|
2019-01-18 00:16:02 +09:00
|
|
|
|
'~dmm\.co\.jp/~' => FanzaResolver::class,
|
2019-02-03 20:35:59 +09:00
|
|
|
|
'~www\.patreon\.com/~' => PatreonResolver::class,
|
2019-01-19 16:19:34 +09:00
|
|
|
|
'~www\.deviantart\.com/.*/art/.*~' => DeviantArtResolver::class,
|
2019-02-03 01:10:09 +09:00
|
|
|
|
'~\.syosetu\.com/n\d+[a-z]{2,}~' => NarouResolver::class,
|
2019-02-16 00:06:51 +09:00
|
|
|
|
'~ci-en\.jp/creator/\d+/article/\d+~' => CienResolver::class,
|
2019-03-01 03:41:35 +09:00
|
|
|
|
'~www\.plurk\.com\/p\/.*~' => PlurkResolver::class,
|
2019-03-12 03:30:38 +09:00
|
|
|
|
'~(adult\.)?contents\.fc2\.com\/article_search\.php\?id=\d+~' => FC2ContentsResolver::class,
|
2019-06-15 04:57:35 +09:00
|
|
|
|
'~store\.steampowered\.com/app/\d+~' => SteamResolver::class,
|
2019-08-06 22:44:38 +09:00
|
|
|
|
'~www\.xtube\.com/video-watch/.*-\d+$~'=> XtubeResolver::class,
|
2019-09-29 11:33:05 +09:00
|
|
|
|
'~ss\.kb10uy\.org/posts/\d+$~' => Kb10uyShortStoryServerResolver::class,
|
2019-08-26 21:46:54 +09:00
|
|
|
|
'~(..|www)\.pornhub\.com/view_video\.php\?viewkey=.+$~'=> PornHubResolver::class,
|
2019-02-09 04:04:41 +09:00
|
|
|
|
];
|
|
|
|
|
|
|
|
|
|
public $mimeTypes = [
|
|
|
|
|
'application/activity+json' => ActivityPubResolver::class,
|
|
|
|
|
'application/ld+json' => ActivityPubResolver::class,
|
2019-02-11 01:43:07 +09:00
|
|
|
|
'text/html' => OGPResolver::class,
|
|
|
|
|
'*/*' => OGPResolver::class
|
2018-04-15 02:05:41 +09:00
|
|
|
|
];
|
|
|
|
|
|
2019-02-11 01:43:07 +09:00
|
|
|
|
public $defaultResolver = OGPResolver::class;
|
|
|
|
|
|
2018-04-15 02:05:41 +09:00
|
|
|
|
public function resolve(string $url): Metadata
|
|
|
|
|
{
|
|
|
|
|
foreach ($this->rules as $pattern => $class) {
|
|
|
|
|
if (preg_match($pattern, $url) === 1) {
|
2019-02-17 02:58:36 +09:00
|
|
|
|
/** @var Resolver $resolver */
|
|
|
|
|
$resolver = app($class);
|
2019-01-15 00:05:01 +09:00
|
|
|
|
|
2018-04-15 02:05:41 +09:00
|
|
|
|
return $resolver->resolve($url);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2019-02-11 01:43:07 +09:00
|
|
|
|
$result = $this->resolveWithAcceptHeader($url);
|
|
|
|
|
if ($result !== null) {
|
|
|
|
|
return $result;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (isset($this->defaultResolver)) {
|
2019-02-17 02:58:36 +09:00
|
|
|
|
/** @var Resolver $resolver */
|
|
|
|
|
$resolver = app($this->defaultResolver);
|
2019-03-03 00:29:43 +09:00
|
|
|
|
|
2019-02-11 01:43:07 +09:00
|
|
|
|
return $resolver->resolve($url);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
throw new \UnexpectedValueException('URL not matched.');
|
|
|
|
|
}
|
2019-02-17 02:58:36 +09:00
|
|
|
|
|
2019-02-11 01:43:07 +09:00
|
|
|
|
public function resolveWithAcceptHeader(string $url): ?Metadata
|
|
|
|
|
{
|
|
|
|
|
try {
|
2019-02-11 03:06:41 +09:00
|
|
|
|
// Rails等はAcceptに */* が入っていると、ブラウザの適当なAcceptヘッダだと判断して全部無視してしまう。
|
|
|
|
|
// c.f. https://github.com/rails/rails/issues/9940
|
|
|
|
|
// そこでここでは */* を「Acceptヘッダを無視してきたレスポンス(よくある)」のハンドラとして扱い、
|
|
|
|
|
// Acceptヘッダには */* を足さないことにする。
|
|
|
|
|
$acceptTypes = array_diff(array_keys($this->mimeTypes), ['*/*']);
|
|
|
|
|
|
2019-02-23 16:56:13 +09:00
|
|
|
|
$client = app(Client::class);
|
2019-02-11 01:43:07 +09:00
|
|
|
|
$res = $client->request('GET', $url, [
|
|
|
|
|
'headers' => [
|
2019-02-11 03:06:41 +09:00
|
|
|
|
'Accept' => implode(', ', $acceptTypes)
|
2019-02-11 01:43:07 +09:00
|
|
|
|
]
|
|
|
|
|
]);
|
|
|
|
|
|
|
|
|
|
if ($res->getStatusCode() === 200) {
|
|
|
|
|
preg_match('/^[^;\s]+/', $res->getHeaderLine('Content-Type'), $matches);
|
|
|
|
|
$mimeType = $matches[0];
|
2019-02-09 04:04:41 +09:00
|
|
|
|
|
2019-02-11 01:43:07 +09:00
|
|
|
|
if (isset($this->mimeTypes[$mimeType])) {
|
|
|
|
|
$class = $this->mimeTypes[$mimeType];
|
2019-02-23 16:56:13 +09:00
|
|
|
|
$parser = app($class);
|
2019-02-09 04:04:41 +09:00
|
|
|
|
|
2019-02-11 01:43:07 +09:00
|
|
|
|
return $parser->parse($res->getBody());
|
|
|
|
|
}
|
2019-02-09 04:04:41 +09:00
|
|
|
|
|
2019-02-11 01:43:07 +09:00
|
|
|
|
if (isset($this->mimeTypes['*/*'])) {
|
|
|
|
|
$class = $this->mimeTypes['*/*'];
|
2019-02-23 16:56:13 +09:00
|
|
|
|
$parser = app($class);
|
2019-02-11 01:43:07 +09:00
|
|
|
|
|
|
|
|
|
return $parser->parse($res->getBody());
|
|
|
|
|
}
|
2019-02-09 04:04:41 +09:00
|
|
|
|
} else {
|
2019-02-11 01:43:07 +09:00
|
|
|
|
// code < 400 && code !== 200 => fallback
|
|
|
|
|
}
|
|
|
|
|
} catch (ClientException $e) {
|
|
|
|
|
// 406 Not Acceptable は多分Acceptが原因なので無視してフォールバック
|
|
|
|
|
if ($e->getResponse()->getStatusCode() !== 406) {
|
|
|
|
|
throw $e;
|
2019-02-09 04:04:41 +09:00
|
|
|
|
}
|
2019-02-11 01:43:07 +09:00
|
|
|
|
} catch (ServerException $e) {
|
|
|
|
|
// 5xx は変なAcceptが原因かもしれない(?)ので無視してフォールバック
|
2019-02-09 04:04:41 +09:00
|
|
|
|
}
|
2019-02-11 01:43:07 +09:00
|
|
|
|
|
|
|
|
|
return null;
|
2018-04-15 02:05:41 +09:00
|
|
|
|
}
|
2019-01-15 00:05:01 +09:00
|
|
|
|
}
|