tissue/app/MetadataResolver/MelonbooksResolver.php

69 lines
2.1 KiB
PHP
Raw Normal View History

<?php
namespace App\MetadataResolver;
use GuzzleHttp\Client;
use GuzzleHttp\Cookie\CookieJar;
class MelonbooksResolver implements Resolver
{
/**
* @var Client
*/
private $client;
/**
* @var OGPResolver
*/
private $ogpResolver;
public function __construct(Client $client, OGPResolver $ogpResolver)
{
$this->client = $client;
$this->ogpResolver = $ogpResolver;
}
public function resolve(string $url): Metadata
{
$cookieJar = CookieJar::fromArray(['AUTH_ADULT' => '1'], 'www.melonbooks.co.jp');
$res = $this->client->get($url, ['cookies' => $cookieJar]);
$metadata = $this->ogpResolver->parse($res->getBody());
$dom = new \DOMDocument();
@$dom->loadHTML(mb_convert_encoding($res->getBody(), 'HTML-ENTITIES', 'UTF-8'));
$xpath = new \DOMXPath($dom);
$descriptionNodelist = $xpath->query('//div[@id="description"]//p');
$specialDescriptionNodelist = $xpath->query('//div[@id="special_description"]//p');
2019-03-08 06:58:30 +09:00
// censoredフラグの除去
if (mb_strpos($metadata->image, '&c=1') !== false) {
$metadata->image = preg_replace('/&c=1/u', '', $metadata->image);
}
// 抽出
preg_match('~^(.+)(.+))の通販・購入はメロンブックス$~', $metadata->title, $match);
$title = $match[1];
$maker = $match[2];
2019-03-08 06:58:30 +09:00
// 整形
$description = 'サークル: ' . $maker . "\n";
2019-03-08 06:58:30 +09:00
if ($specialDescriptionNodelist->length !== 0) {
$description .= trim(str_replace('<br>', "\n", $specialDescriptionNodelist->item(0)->nodeValue)) . "\n";
if ($specialDescriptionNodelist->length === 2) {
$description .= "\n";
$description .= trim(str_replace('<br>', "\n", $specialDescriptionNodelist->item(1)->nodeValue)) . "\n";
2019-03-08 06:58:30 +09:00
}
}
2019-03-08 06:58:30 +09:00
if ($descriptionNodelist->length !== 0) {
$description .= trim(str_replace('<br>', "\n", $descriptionNodelist->item(0)->nodeValue));
}
2019-03-08 06:58:30 +09:00
$metadata->title = $title;
$metadata->description = trim($description);
2019-03-08 06:58:30 +09:00
return $metadata;
}
2019-01-15 00:05:01 +09:00
}