2018-06-13 01:00:24 +09:00
|
|
|
|
<?php
|
|
|
|
|
|
|
|
|
|
namespace App\MetadataResolver;
|
|
|
|
|
|
2019-02-17 02:58:36 +09:00
|
|
|
|
use GuzzleHttp\Client;
|
2018-06-13 01:00:24 +09:00
|
|
|
|
use GuzzleHttp\Cookie\CookieJar;
|
|
|
|
|
|
|
|
|
|
class MelonbooksResolver implements Resolver
|
|
|
|
|
{
|
2019-02-17 02:58:36 +09:00
|
|
|
|
/**
|
|
|
|
|
* @var Client
|
|
|
|
|
*/
|
|
|
|
|
private $client;
|
|
|
|
|
/**
|
|
|
|
|
* @var OGPResolver
|
|
|
|
|
*/
|
|
|
|
|
private $ogpResolver;
|
|
|
|
|
|
|
|
|
|
public function __construct(Client $client, OGPResolver $ogpResolver)
|
|
|
|
|
{
|
|
|
|
|
$this->client = $client;
|
|
|
|
|
$this->ogpResolver = $ogpResolver;
|
|
|
|
|
}
|
|
|
|
|
|
2018-06-13 01:00:24 +09:00
|
|
|
|
public function resolve(string $url): Metadata
|
|
|
|
|
{
|
|
|
|
|
$cookieJar = CookieJar::fromArray(['AUTH_ADULT' => '1'], 'www.melonbooks.co.jp');
|
|
|
|
|
|
2019-02-17 02:58:36 +09:00
|
|
|
|
$res = $this->client->get($url, ['cookies' => $cookieJar]);
|
2018-06-13 01:00:24 +09:00
|
|
|
|
if ($res->getStatusCode() === 200) {
|
2019-02-17 02:58:36 +09:00
|
|
|
|
$metadata = $this->ogpResolver->parse($res->getBody());
|
2019-01-10 01:29:26 +09:00
|
|
|
|
|
2019-03-08 06:58:30 +09:00
|
|
|
|
$dom = new \DOMDocument();
|
|
|
|
|
@$dom->loadHTML(mb_convert_encoding($res->getBody(), 'HTML-ENTITIES', 'UTF-8'));
|
|
|
|
|
$xpath = new \DOMXPath($dom);
|
|
|
|
|
$descriptionNodelist = $xpath->query('//div[@id="description"]//p');
|
|
|
|
|
$specialDescriptionNodelist = $xpath->query('//div[@id="special_description"]//p');
|
|
|
|
|
|
2019-01-10 01:29:26 +09:00
|
|
|
|
// censoredフラグの除去
|
|
|
|
|
if (mb_strpos($metadata->image, '&c=1') !== false) {
|
|
|
|
|
$metadata->image = preg_replace('/&c=1/u', '', $metadata->image);
|
|
|
|
|
}
|
|
|
|
|
|
2019-03-08 06:58:30 +09:00
|
|
|
|
// 抽出
|
|
|
|
|
preg_match('~^(.+)((.+))の通販・購入はメロンブックス$~', $metadata->title, $match);
|
|
|
|
|
$title = $match[1];
|
|
|
|
|
$maker = $match[2];
|
|
|
|
|
|
|
|
|
|
// 整形
|
|
|
|
|
$description = 'サークル: ' . $maker . "\n";
|
|
|
|
|
|
|
|
|
|
if ($specialDescriptionNodelist->length !== 0) {
|
|
|
|
|
$description .= trim(str_replace('<br>', "\n", $specialDescriptionNodelist->item(0)->nodeValue)) . "\n";
|
|
|
|
|
if ($specialDescriptionNodelist->length === 2) {
|
|
|
|
|
$description .= "\n";
|
|
|
|
|
$description .= trim(str_replace('<br>', "\n", $specialDescriptionNodelist->item(1)->nodeValue)) . "\n";
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if ($descriptionNodelist->length !== 0) {
|
|
|
|
|
$description .= trim(str_replace('<br>', "\n", $descriptionNodelist->item(0)->nodeValue));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
$metadata->title = $title;
|
|
|
|
|
$metadata->description = trim($description);
|
|
|
|
|
|
2019-01-10 01:29:26 +09:00
|
|
|
|
return $metadata;
|
2018-06-13 01:00:24 +09:00
|
|
|
|
} else {
|
|
|
|
|
throw new \RuntimeException("{$res->getStatusCode()}: $url");
|
|
|
|
|
}
|
|
|
|
|
}
|
2019-01-15 00:05:01 +09:00
|
|
|
|
}
|