2018-04-15 02:05:41 +09:00
|
|
|
<?php
|
|
|
|
|
|
|
|
namespace App\MetadataResolver;
|
|
|
|
|
2019-02-17 02:58:36 +09:00
|
|
|
use GuzzleHttp\Client;
|
2020-08-08 18:32:32 +09:00
|
|
|
use GuzzleHttp\Cookie\CookieJar;
|
|
|
|
use GuzzleHttp\RequestOptions;
|
2019-02-17 02:58:36 +09:00
|
|
|
|
2019-02-09 04:04:41 +09:00
|
|
|
class OGPResolver implements Resolver, Parser
|
2018-04-15 02:05:41 +09:00
|
|
|
{
|
2019-02-17 02:58:36 +09:00
|
|
|
/**
|
|
|
|
* @var Client
|
|
|
|
*/
|
|
|
|
private $client;
|
|
|
|
|
|
|
|
public function __construct(Client $client)
|
|
|
|
{
|
|
|
|
$this->client = $client;
|
|
|
|
}
|
|
|
|
|
2018-04-15 02:05:41 +09:00
|
|
|
public function resolve(string $url): Metadata
|
|
|
|
{
|
2020-08-08 18:32:32 +09:00
|
|
|
return $this->parse($this->client->get($url, [RequestOptions::COOKIES => new CookieJar()])->getBody());
|
2018-04-15 02:05:41 +09:00
|
|
|
}
|
|
|
|
|
|
|
|
public function parse(string $html): Metadata
|
|
|
|
{
|
|
|
|
$dom = new \DOMDocument();
|
2019-01-22 12:46:44 +09:00
|
|
|
@$dom->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'ASCII,JIS,UTF-8,eucJP-win,SJIS-win'));
|
2018-04-15 02:05:41 +09:00
|
|
|
$xpath = new \DOMXPath($dom);
|
|
|
|
|
|
|
|
$metadata = new Metadata();
|
|
|
|
|
2018-06-13 01:00:11 +09:00
|
|
|
$metadata->title = $this->findContent($xpath, '//meta[@*="og:title"]', '//meta[@*="twitter:title"]');
|
2019-01-23 00:04:10 +09:00
|
|
|
if (empty($metadata->title)) {
|
|
|
|
$nodes = $xpath->query('//title');
|
|
|
|
if ($nodes->length !== 0) {
|
|
|
|
$metadata->title = $nodes->item(0)->textContent;
|
|
|
|
}
|
|
|
|
}
|
2019-02-12 22:45:51 +09:00
|
|
|
$metadata->description = $this->findContent($xpath, '//meta[@*="og:description"]', '//meta[@*="twitter:description"]', '//meta[@name="description"]');
|
2018-06-13 01:00:11 +09:00
|
|
|
$metadata->image = $this->findContent($xpath, '//meta[@*="og:image"]', '//meta[@*="twitter:image"]');
|
2018-04-15 02:05:41 +09:00
|
|
|
|
2018-06-13 01:00:11 +09:00
|
|
|
return $metadata;
|
|
|
|
}
|
2018-04-15 02:05:41 +09:00
|
|
|
|
2018-06-13 01:00:11 +09:00
|
|
|
private function findContent(\DOMXPath $xpath, string ...$expressions)
|
|
|
|
{
|
|
|
|
foreach ($expressions as $expression) {
|
|
|
|
$nodes = $xpath->query($expression);
|
|
|
|
foreach ($nodes as $node) {
|
|
|
|
$content = $node->getAttribute('content');
|
|
|
|
if (!empty($content)) {
|
|
|
|
return $content;
|
|
|
|
}
|
2018-04-15 02:05:41 +09:00
|
|
|
}
|
|
|
|
}
|
2019-01-15 00:05:01 +09:00
|
|
|
|
2018-06-13 01:00:11 +09:00
|
|
|
return '';
|
2018-04-15 02:05:41 +09:00
|
|
|
}
|
2019-01-15 00:05:01 +09:00
|
|
|
}
|