tissue/app/MetadataResolver/MetadataResolver.php

114 lines
4.4 KiB
PHP
Raw Normal View History

<?php
namespace App\MetadataResolver;
use GuzzleHttp\Client;
use GuzzleHttp\Exception\ClientException;
use GuzzleHttp\Exception\ServerException;
class MetadataResolver implements Resolver
{
public $rules = [
'~(((sp\.)?seiga\.nicovideo\.jp/seiga(/#!)?|nico\.ms))/im~' => NicoSeigaResolver::class,
'~nijie\.info/view(_popup)?\.php~' => NijieResolver::class,
'~komiflo\.com(/#!)?/comics/(\\d+)~' => KomifloResolver::class,
'~www\.melonbooks\.co\.jp/detail/detail\.php~' => MelonbooksResolver::class,
'~ec\.toranoana\.jp/tora_r/ec/item/.*~' => ToranoanaResolver::class,
2018-11-20 23:31:57 +09:00
'~iwara\.tv/videos/.*~' => IwaraResolver::class,
2019-01-14 04:22:40 +09:00
'~www\.dlsite\.com/.*/work/=/product_id/..\d+\.html~' => DLsiteResolver::class,
2019-03-08 05:09:26 +09:00
'~dlsite\.jp/mawtw/..\d+~' => DLsiteResolver::class,
2019-01-14 16:01:58 +09:00
'~www\.pixiv\.net/member_illust\.php\?illust_id=\d+~' => PixivResolver::class,
2019-01-14 22:55:01 +09:00
'~fantia\.jp/posts/\d+~' => FantiaResolver::class,
'~dmm\.co\.jp/~' => FanzaResolver::class,
2019-02-03 20:35:59 +09:00
'~www\.patreon\.com/~' => PatreonResolver::class,
2019-01-19 16:19:34 +09:00
'~www\.deviantart\.com/.*/art/.*~' => DeviantArtResolver::class,
2019-02-03 01:10:09 +09:00
'~\.syosetu\.com/n\d+[a-z]{2,}~' => NarouResolver::class,
2019-02-16 00:06:51 +09:00
'~ci-en\.jp/creator/\d+/article/\d+~' => CienResolver::class,
2019-03-01 03:41:35 +09:00
'~www\.plurk\.com\/p\/.*~' => PlurkResolver::class,
2019-03-12 03:30:38 +09:00
'~(adult\.)?contents\.fc2\.com\/article_search\.php\?id=\d+~' => FC2ContentsResolver::class,
2019-02-09 04:04:41 +09:00
];
public $mimeTypes = [
'application/activity+json' => ActivityPubResolver::class,
'application/ld+json' => ActivityPubResolver::class,
'text/html' => OGPResolver::class,
'*/*' => OGPResolver::class
];
public $defaultResolver = OGPResolver::class;
public function resolve(string $url): Metadata
{
foreach ($this->rules as $pattern => $class) {
if (preg_match($pattern, $url) === 1) {
/** @var Resolver $resolver */
$resolver = app($class);
2019-01-15 00:05:01 +09:00
return $resolver->resolve($url);
}
}
$result = $this->resolveWithAcceptHeader($url);
if ($result !== null) {
return $result;
}
if (isset($this->defaultResolver)) {
/** @var Resolver $resolver */
$resolver = app($this->defaultResolver);
2019-03-03 00:29:43 +09:00
return $resolver->resolve($url);
}
throw new \UnexpectedValueException('URL not matched.');
}
public function resolveWithAcceptHeader(string $url): ?Metadata
{
try {
// Rails等はAcceptに */* が入っていると、ブラウザの適当なAcceptヘッダだと判断して全部無視してしまう。
// c.f. https://github.com/rails/rails/issues/9940
// そこでここでは */* を「Acceptヘッダを無視してきたレスポンスよくある」のハンドラとして扱い、
// Acceptヘッダには */* を足さないことにする。
$acceptTypes = array_diff(array_keys($this->mimeTypes), ['*/*']);
$client = app(Client::class);
$res = $client->request('GET', $url, [
'headers' => [
'Accept' => implode(', ', $acceptTypes)
]
]);
if ($res->getStatusCode() === 200) {
preg_match('/^[^;\s]+/', $res->getHeaderLine('Content-Type'), $matches);
$mimeType = $matches[0];
2019-02-09 04:04:41 +09:00
if (isset($this->mimeTypes[$mimeType])) {
$class = $this->mimeTypes[$mimeType];
$parser = app($class);
2019-02-09 04:04:41 +09:00
return $parser->parse($res->getBody());
}
2019-02-09 04:04:41 +09:00
if (isset($this->mimeTypes['*/*'])) {
$class = $this->mimeTypes['*/*'];
$parser = app($class);
return $parser->parse($res->getBody());
}
2019-02-09 04:04:41 +09:00
} else {
// code < 400 && code !== 200 => fallback
}
} catch (ClientException $e) {
// 406 Not Acceptable は多分Acceptが原因なので無視してフォールバック
if ($e->getResponse()->getStatusCode() !== 406) {
throw $e;
2019-02-09 04:04:41 +09:00
}
} catch (ServerException $e) {
// 5xx は変なAcceptが原因かもしれないので無視してフォールバック
2019-02-09 04:04:41 +09:00
}
return null;
}
2019-01-15 00:05:01 +09:00
}