From 5750eeb3a5b31c0177c1a3f65f9ba15b49bfd24e Mon Sep 17 00:00:00 2001 From: unarist Date: Sat, 9 Feb 2019 04:04:41 +0900 Subject: [PATCH] =?UTF-8?q?ActivityPubResolver=E3=82=92=E8=BF=BD=E5=8A=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/MetadataResolver/ActivityPubResolver.php | 71 ++++++++++++++++++++ app/MetadataResolver/MetadataResolver.php | 30 ++++++++- app/MetadataResolver/OGPResolver.php | 2 +- app/MetadataResolver/Parser.php | 8 +++ 4 files changed, 108 insertions(+), 3 deletions(-) create mode 100644 app/MetadataResolver/ActivityPubResolver.php create mode 100644 app/MetadataResolver/Parser.php diff --git a/app/MetadataResolver/ActivityPubResolver.php b/app/MetadataResolver/ActivityPubResolver.php new file mode 100644 index 0000000..f790f02 --- /dev/null +++ b/app/MetadataResolver/ActivityPubResolver.php @@ -0,0 +1,71 @@ +activityClient = new \GuzzleHttp\Client([ + 'headers' => [ + 'Accept' => 'application/activity+json, application/ld+json; profile="https://www.w3.org/ns/activitystreams"' + ] + ]); + } + + public function resolve(string $url): Metadata + { + $res = $this->activityClient->get($url); + if ($res->getStatusCode() === 200) { + return $this->parse($res->getBody()); + } else { + throw new \RuntimeException("{$res->getStatusCode()}: $url"); + } + } + + public function parse(string $json): Metadata + { + $activityOrObject = json_decode($json, true); + $object = $activityOrObject['object'] ?? $activityOrObject; + + $metadata = new Metadata(); + + $metadata->title = isset($object['attributedTo']) ? $this->getTitleFromActor($object['attributedTo']) : ''; + $metadata->description = isset($object['content']) ? $this->html2text($object['content']) : ''; + $metadata->image = $object['attachment'][0]['url'] ?? ''; + + return $metadata; + } + + private function getTitleFromActor(string $url): string + { + $res = $this->activityClient->get($url); + if ($res->getStatusCode() !== 200) { + return ''; + } + + $actor = json_decode($res->getBody(), true); + $title = $actor['name'] ?? ''; + if (isset($actor['preferredUsername'])) { + $title .= ' (@' . $actor['preferredUsername'] . '@' . parse_url($actor['id'], PHP_URL_HOST) . ')'; + } + + return $title; + } + + private function html2text(string $html): string + { + $html = mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'); + $html = preg_replace('~|

\s*]*>~i', "\n", $html); + $dom = new \DOMDocument(); + $dom->loadHTML($html, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD); + return $dom->textContent; + } +} diff --git a/app/MetadataResolver/MetadataResolver.php b/app/MetadataResolver/MetadataResolver.php index 94f2156..8312af9 100644 --- a/app/MetadataResolver/MetadataResolver.php +++ b/app/MetadataResolver/MetadataResolver.php @@ -18,7 +18,12 @@ class MetadataResolver implements Resolver '~www\.patreon\.com/~' => PatreonResolver::class, '~www\.deviantart\.com/.*/art/.*~' => DeviantArtResolver::class, '~\.syosetu\.com/n\d+[a-z]{2,}~' => NarouResolver::class, - '/.*/' => OGPResolver::class + ]; + + public $mimeTypes = [ + 'application/activity+json' => ActivityPubResolver::class, + 'application/ld+json' => ActivityPubResolver::class, + 'text/html' => OGPResolver::class ]; public function resolve(string $url): Metadata @@ -31,6 +36,27 @@ class MetadataResolver implements Resolver } } - throw new \UnexpectedValueException('URL not matched.'); + $client = new \GuzzleHttp\Client(); + $res = $client->request('GET', $url, [ + 'headers' => [ + 'Accept' => implode(', ', array_keys($this->mimeTypes)) + ] + ]); + + if ($res->getStatusCode() === 200) { + preg_match('/^[^;\s]+/', $res->getHeaderLine('Content-Type'), $matches); + $mimeType = $matches[0]; + + if (isset($this->mimeTypes[$mimeType])) { + $class = $this->mimeTypes[$mimeType]; + $parser = new $class(); + + return $parser->parse($res->getBody()); + } else { + throw new \UnexpectedValueException('URL not matched.'); + } + } else { + throw new \RuntimeException("{$res->getStatusCode()}: $url"); + } } } diff --git a/app/MetadataResolver/OGPResolver.php b/app/MetadataResolver/OGPResolver.php index 5afe83c..3002d91 100644 --- a/app/MetadataResolver/OGPResolver.php +++ b/app/MetadataResolver/OGPResolver.php @@ -2,7 +2,7 @@ namespace App\MetadataResolver; -class OGPResolver implements Resolver +class OGPResolver implements Resolver, Parser { public function resolve(string $url): Metadata { diff --git a/app/MetadataResolver/Parser.php b/app/MetadataResolver/Parser.php new file mode 100644 index 0000000..f9effde --- /dev/null +++ b/app/MetadataResolver/Parser.php @@ -0,0 +1,8 @@ +