diff --git a/app/MetadataResolver/ActivityPubResolver.php b/app/MetadataResolver/ActivityPubResolver.php
new file mode 100644
index 0000000..981cf9d
--- /dev/null
+++ b/app/MetadataResolver/ActivityPubResolver.php
@@ -0,0 +1,80 @@
+activityClient = new \GuzzleHttp\Client([
+ 'headers' => [
+ 'Accept' => 'application/activity+json, application/ld+json; profile="https://www.w3.org/ns/activitystreams"'
+ ]
+ ]);
+ }
+
+ public function resolve(string $url): Metadata
+ {
+ $res = $this->activityClient->get($url);
+ if ($res->getStatusCode() === 200) {
+ return $this->parse($res->getBody());
+ } else {
+ throw new \RuntimeException("{$res->getStatusCode()}: $url");
+ }
+ }
+
+ public function parse(string $json): Metadata
+ {
+ $activityOrObject = json_decode($json, true);
+ $object = $activityOrObject['object'] ?? $activityOrObject;
+
+ $metadata = new Metadata();
+
+ $metadata->title = isset($object['attributedTo']) ? $this->getTitleFromActor($object['attributedTo']) : '';
+ $metadata->description .= isset($object['summary']) ? $object['summary'] . " | " : '';
+ $metadata->description .= isset($object['content']) ? $this->html2text($object['content']) : '';
+ $metadata->image = $object['attachment'][0]['url'] ?? '';
+
+ return $metadata;
+ }
+
+ private function getTitleFromActor(string $url): string
+ {
+ try {
+ $res = $this->activityClient->get($url);
+ if ($res->getStatusCode() !== 200) {
+ Log::info(self::class . ': Actorの取得に失敗 URL=' . $url);
+ return '';
+ }
+
+ $actor = json_decode($res->getBody(), true);
+ $title = $actor['name'] ?? '';
+ if (isset($actor['preferredUsername'])) {
+ $title .= ' (@' . $actor['preferredUsername'] . '@' . parse_url($actor['id'], PHP_URL_HOST) . ')';
+ }
+
+ return $title;
+ } catch (TransferException $e) {
+ Log::info(self::class . ': Actorの取得に失敗 URL=' . $url);
+ return '';
+ }
+ }
+
+ private function html2text(string $html): string
+ {
+ $html = mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8');
+ $html = preg_replace('~
|
]*>~i', "\n", $html); + $dom = new \DOMDocument(); + $dom->loadHTML($html, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD); + return $dom->textContent; + } +} diff --git a/app/MetadataResolver/MetadataResolver.php b/app/MetadataResolver/MetadataResolver.php index 94f2156..a00625b 100644 --- a/app/MetadataResolver/MetadataResolver.php +++ b/app/MetadataResolver/MetadataResolver.php @@ -2,6 +2,9 @@ namespace App\MetadataResolver; +use GuzzleHttp\Exception\ClientException; +use GuzzleHttp\Exception\ServerException; + class MetadataResolver implements Resolver { public $rules = [ @@ -18,9 +21,17 @@ class MetadataResolver implements Resolver '~www\.patreon\.com/~' => PatreonResolver::class, '~www\.deviantart\.com/.*/art/.*~' => DeviantArtResolver::class, '~\.syosetu\.com/n\d+[a-z]{2,}~' => NarouResolver::class, - '/.*/' => OGPResolver::class ]; + public $mimeTypes = [ + 'application/activity+json' => ActivityPubResolver::class, + 'application/ld+json' => ActivityPubResolver::class, + 'text/html' => OGPResolver::class, + '*/*' => OGPResolver::class + ]; + + public $defaultResolver = OGPResolver::class; + public function resolve(string $url): Metadata { foreach ($this->rules as $pattern => $class) { @@ -31,6 +42,64 @@ class MetadataResolver implements Resolver } } + $result = $this->resolveWithAcceptHeader($url); + if ($result !== null) { + return $result; + } + + if (isset($this->defaultResolver)) { + $resolver = new $this->defaultResolver(); + return $resolver->resolve($url); + } + throw new \UnexpectedValueException('URL not matched.'); } + + public function resolveWithAcceptHeader(string $url): ?Metadata + { + try { + // Rails等はAcceptに */* が入っていると、ブラウザの適当なAcceptヘッダだと判断して全部無視してしまう。 + // c.f. https://github.com/rails/rails/issues/9940 + // そこでここでは */* を「Acceptヘッダを無視してきたレスポンス(よくある)」のハンドラとして扱い、 + // Acceptヘッダには */* を足さないことにする。 + $acceptTypes = array_diff(array_keys($this->mimeTypes), ['*/*']); + + $client = new \GuzzleHttp\Client(); + $res = $client->request('GET', $url, [ + 'headers' => [ + 'Accept' => implode(', ', $acceptTypes) + ] + ]); + + if ($res->getStatusCode() === 200) { + preg_match('/^[^;\s]+/', $res->getHeaderLine('Content-Type'), $matches); + $mimeType = $matches[0]; + + if (isset($this->mimeTypes[$mimeType])) { + $class = $this->mimeTypes[$mimeType]; + $parser = new $class(); + + return $parser->parse($res->getBody()); + } + + if (isset($this->mimeTypes['*/*'])) { + $class = $this->mimeTypes['*/*']; + $parser = new $class(); + + return $parser->parse($res->getBody()); + } + } else { + // code < 400 && code !== 200 => fallback + } + } catch (ClientException $e) { + // 406 Not Acceptable は多分Acceptが原因なので無視してフォールバック + if ($e->getResponse()->getStatusCode() !== 406) { + throw $e; + } + } catch (ServerException $e) { + // 5xx は変なAcceptが原因かもしれない(?)ので無視してフォールバック + } + + return null; + } } diff --git a/app/MetadataResolver/OGPResolver.php b/app/MetadataResolver/OGPResolver.php index 5afe83c..3002d91 100644 --- a/app/MetadataResolver/OGPResolver.php +++ b/app/MetadataResolver/OGPResolver.php @@ -2,7 +2,7 @@ namespace App\MetadataResolver; -class OGPResolver implements Resolver +class OGPResolver implements Resolver, Parser { public function resolve(string $url): Metadata { diff --git a/app/MetadataResolver/Parser.php b/app/MetadataResolver/Parser.php new file mode 100644 index 0000000..f9effde --- /dev/null +++ b/app/MetadataResolver/Parser.php @@ -0,0 +1,8 @@ +