Merge pull request #468 from shikorism/feature/per-host-resolve-control
リモートホストごとの同時アクセス制御とメタデータ取得ポリシー制御
This commit is contained in:
commit
301fc83e7e
24
app/ContentProvider.php
Normal file
24
app/ContentProvider.php
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
namespace App;
|
||||||
|
|
||||||
|
use Illuminate\Database\Eloquent\Model;
|
||||||
|
|
||||||
|
class ContentProvider extends Model
|
||||||
|
{
|
||||||
|
public $incrementing = false;
|
||||||
|
protected $primaryKey = 'host';
|
||||||
|
protected $keyType = 'string';
|
||||||
|
|
||||||
|
protected $fillable = [
|
||||||
|
'host',
|
||||||
|
'robots',
|
||||||
|
'robots_cached_at',
|
||||||
|
];
|
||||||
|
|
||||||
|
protected $dates = [
|
||||||
|
'created_at',
|
||||||
|
'updated_at',
|
||||||
|
'robots_cached_at',
|
||||||
|
];
|
||||||
|
}
|
30
app/MetadataResolver/DisallowedByProviderException.php
Normal file
30
app/MetadataResolver/DisallowedByProviderException.php
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
namespace App\MetadataResolver;
|
||||||
|
|
||||||
|
use RuntimeException;
|
||||||
|
use Throwable;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* ContentProviderの提供するrobots.txtによってクロールが拒否された場合にスローされます。
|
||||||
|
*/
|
||||||
|
class DisallowedByProviderException extends RuntimeException
|
||||||
|
{
|
||||||
|
private $url;
|
||||||
|
|
||||||
|
public function __construct(string $url, Throwable $previous = null)
|
||||||
|
{
|
||||||
|
parent::__construct("Access denied by robots.txt: $url", 0, $previous);
|
||||||
|
$this->url = $url;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getUrl(): string
|
||||||
|
{
|
||||||
|
return $this->url;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getHost(): string
|
||||||
|
{
|
||||||
|
return parse_url($this->url, PHP_URL_HOST);
|
||||||
|
}
|
||||||
|
}
|
@ -2,14 +2,20 @@
|
|||||||
|
|
||||||
namespace App\Services;
|
namespace App\Services;
|
||||||
|
|
||||||
|
use App\ContentProvider;
|
||||||
use App\Metadata;
|
use App\Metadata;
|
||||||
use App\MetadataResolver\DeniedHostException;
|
use App\MetadataResolver\DeniedHostException;
|
||||||
|
use App\MetadataResolver\DisallowedByProviderException;
|
||||||
use App\MetadataResolver\MetadataResolver;
|
use App\MetadataResolver\MetadataResolver;
|
||||||
use App\MetadataResolver\ResolverCircuitBreakException;
|
use App\MetadataResolver\ResolverCircuitBreakException;
|
||||||
use App\MetadataResolver\UncaughtResolverException;
|
use App\MetadataResolver\UncaughtResolverException;
|
||||||
use App\Tag;
|
use App\Tag;
|
||||||
use App\Utilities\Formatter;
|
use App\Utilities\Formatter;
|
||||||
|
use Carbon\Carbon;
|
||||||
|
use Carbon\CarbonInterface;
|
||||||
|
use GuzzleHttp\Client;
|
||||||
use Illuminate\Support\Facades\DB;
|
use Illuminate\Support\Facades\DB;
|
||||||
|
use Illuminate\Support\Facades\Log;
|
||||||
|
|
||||||
class MetadataResolveService
|
class MetadataResolveService
|
||||||
{
|
{
|
||||||
@ -44,48 +50,242 @@ class MetadataResolveService
|
|||||||
throw new DeniedHostException($url);
|
throw new DeniedHostException($url);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
$metadata = Metadata::find($url);
|
||||||
|
|
||||||
|
// 無かったら取得
|
||||||
|
// TODO: ある程度古かったら再取得とかありだと思う
|
||||||
|
if ($metadata == null || $metadata->needRefresh()) {
|
||||||
|
$hostWithPort = $this->getHostWithPortFromUrl($url);
|
||||||
|
$metadata = $this->hostLock($hostWithPort, function (?CarbonInterface $lastAccess) use ($url) {
|
||||||
|
// HostLockの解放待ちをしている間に、他のプロセスで取得完了しているかもしれない
|
||||||
|
$metadata = Metadata::find($url);
|
||||||
|
if ($metadata !== null && !$metadata->needRefresh()) {
|
||||||
|
return $metadata;
|
||||||
|
}
|
||||||
|
|
||||||
|
$this->checkProviderPolicy($url, $lastAccess);
|
||||||
|
|
||||||
|
return $this->resolve($url, $metadata);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
return $metadata;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* URLからホスト部とポート部を抽出
|
||||||
|
* @param string $url
|
||||||
|
* @return string
|
||||||
|
*/
|
||||||
|
private function getHostWithPortFromUrl(string $url): string
|
||||||
|
{
|
||||||
|
$parts = parse_url($url);
|
||||||
|
$host = $parts['host'];
|
||||||
|
if (isset($parts['port'])) {
|
||||||
|
$host .= ':' . $parts['port'];
|
||||||
|
}
|
||||||
|
|
||||||
|
return $host;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* アクセス先ホスト単位の排他ロックを取って処理を実行
|
||||||
|
* @param string $host
|
||||||
|
* @param callable $fn
|
||||||
|
* @return mixed return of $fn
|
||||||
|
* @throws \RuntimeException いろいろな死に方をする
|
||||||
|
*/
|
||||||
|
private function hostLock(string $host, callable $fn)
|
||||||
|
{
|
||||||
|
$lockDir = storage_path('content_providers_lock');
|
||||||
|
if (!file_exists($lockDir)) {
|
||||||
|
if (!mkdir($lockDir)) {
|
||||||
|
throw new \RuntimeException("Lock failed! Can't create lock directory.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
$lockFile = $lockDir . DIRECTORY_SEPARATOR . $host;
|
||||||
|
$fp = fopen($lockFile, 'c+b');
|
||||||
|
if ($fp === false) {
|
||||||
|
throw new \RuntimeException("Lock failed! Can't open lock file.");
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
if (!flock($fp, LOCK_EX)) {
|
||||||
|
throw new \RuntimeException("Lock failed! Can't lock file.");
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
$accessInfoText = stream_get_contents($fp);
|
||||||
|
if ($accessInfoText !== false) {
|
||||||
|
$accessInfo = json_decode($accessInfoText, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
$result = $fn(isset($accessInfo['time']) ? new Carbon($accessInfo['time']) : null);
|
||||||
|
|
||||||
|
$accessInfo = [
|
||||||
|
'time' => now()->toIso8601String()
|
||||||
|
];
|
||||||
|
fseek($fp, 0);
|
||||||
|
if (fwrite($fp, json_encode($accessInfo)) === false) {
|
||||||
|
throw new \RuntimeException("I/O Error! Can't write to lock file.");
|
||||||
|
}
|
||||||
|
|
||||||
|
return $result;
|
||||||
|
} finally {
|
||||||
|
if (!flock($fp, LOCK_UN)) {
|
||||||
|
throw new \RuntimeException("Unlock failed! Can't unlock file.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
if (!fclose($fp)) {
|
||||||
|
throw new \RuntimeException("Unlock failed! Can't close lock file.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 指定したメタデータURLのホストが持つrobots.txtをダウンロードします。
|
||||||
|
* @param string $url メタデータのURL
|
||||||
|
* @return string
|
||||||
|
*/
|
||||||
|
private function fetchRobotsTxt(string $url): ?string
|
||||||
|
{
|
||||||
|
$parts = parse_url($url);
|
||||||
|
$robotsUrl = http_build_url([
|
||||||
|
'scheme' => $parts['scheme'],
|
||||||
|
'host' => $parts['host'],
|
||||||
|
'port' => $parts['port'] ?? null,
|
||||||
|
'path' => '/robots.txt'
|
||||||
|
]);
|
||||||
|
|
||||||
|
$client = app(Client::class);
|
||||||
|
try {
|
||||||
|
$res = $client->get($robotsUrl);
|
||||||
|
|
||||||
|
return (string) $res->getBody();
|
||||||
|
} catch (\Exception $e) {
|
||||||
|
Log::error("robots.txtの取得に失敗: {$e}");
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* ContentProviderポリシー情報との照合を行い、アクセス可能かチェックします。アクセスできない場合は例外をスローします。
|
||||||
|
* @param string $url メタデータを取得したいURL
|
||||||
|
* @param CarbonInterface|null $lastAccess アクセス先ホストへの最終アクセス日時 (記録がある場合)
|
||||||
|
* @throws DeniedHostException アクセス先がTissue内のブラックリストに入っている場合にスロー
|
||||||
|
* @throws DisallowedByProviderException アクセス先のrobots.txtによって拒否されている場合にスロー
|
||||||
|
*/
|
||||||
|
private function checkProviderPolicy(string $url, ?CarbonInterface $lastAccess): void
|
||||||
|
{
|
||||||
DB::beginTransaction();
|
DB::beginTransaction();
|
||||||
try {
|
try {
|
||||||
$metadata = Metadata::find($url);
|
$hostWithPort = $this->getHostWithPortFromUrl($url);
|
||||||
|
$contentProvider = ContentProvider::sharedLock()->find($hostWithPort);
|
||||||
// 無かったら取得
|
if ($contentProvider === null) {
|
||||||
// TODO: ある程度古かったら再取得とかありだと思う
|
$contentProvider = ContentProvider::create([
|
||||||
if ($metadata == null || $metadata->needRefresh()) {
|
'host' => $hostWithPort,
|
||||||
if ($metadata === null) {
|
'robots' => $this->fetchRobotsTxt($url),
|
||||||
$metadata = new Metadata(['url' => $url]);
|
'robots_cached_at' => now(),
|
||||||
}
|
|
||||||
|
|
||||||
if ($metadata->error_count >= self::CIRCUIT_BREAK_COUNT) {
|
|
||||||
throw new ResolverCircuitBreakException($metadata->error_count, $url);
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
$resolved = $this->resolver->resolve($url);
|
|
||||||
} catch (\Exception $e) {
|
|
||||||
$metadata->storeException(now(), $e);
|
|
||||||
$metadata->save();
|
|
||||||
throw new UncaughtResolverException(implode(': ', [
|
|
||||||
$metadata->error_count . '回目のメタデータ取得失敗', get_class($e), $e->getMessage()
|
|
||||||
]), 0, $e);
|
|
||||||
}
|
|
||||||
|
|
||||||
$metadata->fill([
|
|
||||||
'title' => $resolved->title,
|
|
||||||
'description' => $resolved->description,
|
|
||||||
'image' => $resolved->image,
|
|
||||||
'expires_at' => $resolved->expires_at
|
|
||||||
]);
|
]);
|
||||||
$metadata->clearError();
|
|
||||||
$metadata->save();
|
|
||||||
|
|
||||||
$tagIds = [];
|
|
||||||
foreach ($resolved->normalizedTags() as $tagName) {
|
|
||||||
$tag = Tag::firstOrCreate(['name' => $tagName]);
|
|
||||||
$tagIds[] = $tag->id;
|
|
||||||
}
|
|
||||||
$metadata->tags()->sync($tagIds);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ($contentProvider->is_blocked) {
|
||||||
|
throw new DeniedHostException($url);
|
||||||
|
}
|
||||||
|
|
||||||
|
// 連続アクセス制限
|
||||||
|
if ($lastAccess !== null) {
|
||||||
|
$elapsedSeconds = $lastAccess->diffInSeconds(now(), false);
|
||||||
|
if ($elapsedSeconds < $contentProvider->access_interval_sec) {
|
||||||
|
if ($elapsedSeconds < 0) {
|
||||||
|
$wait = abs($elapsedSeconds) + $contentProvider->access_interval_sec;
|
||||||
|
} else {
|
||||||
|
$wait = $contentProvider->access_interval_sec - $elapsedSeconds;
|
||||||
|
}
|
||||||
|
sleep($wait);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fetch robots.txt
|
||||||
|
if ($contentProvider->robots_cached_at->diffInDays(now()) >= 7) {
|
||||||
|
$contentProvider->update([
|
||||||
|
'robots' => $this->fetchRobotsTxt($url),
|
||||||
|
'robots_cached_at' => now(),
|
||||||
|
]);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check robots.txt
|
||||||
|
$robotsParser = new \RobotsTxtParser($contentProvider->robots);
|
||||||
|
$robotsParser->setUserAgent('TissueBot');
|
||||||
|
$robotsDelay = $robotsParser->getDelay();
|
||||||
|
if ($robotsDelay !== 0 && $robotsDelay >= $contentProvider->access_interval_sec) {
|
||||||
|
$contentProvider->access_interval_sec = (int) $robotsDelay;
|
||||||
|
$contentProvider->save();
|
||||||
|
}
|
||||||
|
if ($robotsParser->isDisallowed(parse_url($url, PHP_URL_PATH))) {
|
||||||
|
throw new DisallowedByProviderException($url);
|
||||||
|
}
|
||||||
|
|
||||||
|
DB::commit();
|
||||||
|
} catch (DeniedHostException | DisallowedByProviderException $e) {
|
||||||
|
// ContentProviderのデータ更新は行うため
|
||||||
|
DB::commit();
|
||||||
|
throw $e;
|
||||||
|
} catch (\Exception $e) {
|
||||||
|
DB::rollBack();
|
||||||
|
throw $e;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* メタデータをリモートサーバに問い合わせて取得します。
|
||||||
|
* @param string $url メタデータを取得したいURL
|
||||||
|
* @param Metadata|null $metadata キャッシュ済のメタデータ (存在する場合)
|
||||||
|
* @return Metadata 取得できたメタデータ
|
||||||
|
* @throws UncaughtResolverException Resolver内で例外が発生して取得できなかった場合にスロー
|
||||||
|
* @throws ResolverCircuitBreakException 規定回数以上の解決失敗により、メタデータの取得が不能となっている場合にスロー
|
||||||
|
*/
|
||||||
|
private function resolve(string $url, ?Metadata $metadata): Metadata
|
||||||
|
{
|
||||||
|
DB::beginTransaction();
|
||||||
|
try {
|
||||||
|
if ($metadata === null) {
|
||||||
|
$metadata = new Metadata(['url' => $url]);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($metadata->error_count >= self::CIRCUIT_BREAK_COUNT) {
|
||||||
|
throw new ResolverCircuitBreakException($metadata->error_count, $url);
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
$resolved = $this->resolver->resolve($url);
|
||||||
|
} catch (\Exception $e) {
|
||||||
|
$metadata->storeException(now(), $e);
|
||||||
|
$metadata->save();
|
||||||
|
throw new UncaughtResolverException(implode(': ', [
|
||||||
|
$metadata->error_count . '回目のメタデータ取得失敗', get_class($e), $e->getMessage()
|
||||||
|
]), 0, $e);
|
||||||
|
}
|
||||||
|
|
||||||
|
$metadata->fill([
|
||||||
|
'title' => $resolved->title,
|
||||||
|
'description' => $resolved->description,
|
||||||
|
'image' => $resolved->image,
|
||||||
|
'expires_at' => $resolved->expires_at
|
||||||
|
]);
|
||||||
|
$metadata->clearError();
|
||||||
|
$metadata->save();
|
||||||
|
|
||||||
|
$tagIds = [];
|
||||||
|
foreach ($resolved->normalizedTags() as $tagName) {
|
||||||
|
$tag = Tag::firstOrCreate(['name' => $tagName]);
|
||||||
|
$tagIds[] = $tag->id;
|
||||||
|
}
|
||||||
|
$metadata->tags()->sync($tagIds);
|
||||||
|
|
||||||
DB::commit();
|
DB::commit();
|
||||||
|
|
||||||
return $metadata;
|
return $metadata;
|
||||||
|
@ -33,7 +33,8 @@
|
|||||||
"sentry/sentry-laravel": "1.8.0",
|
"sentry/sentry-laravel": "1.8.0",
|
||||||
"staudenmeir/eloquent-eager-limit": "^1.0",
|
"staudenmeir/eloquent-eager-limit": "^1.0",
|
||||||
"symfony/css-selector": "^4.3",
|
"symfony/css-selector": "^4.3",
|
||||||
"symfony/dom-crawler": "^4.3"
|
"symfony/dom-crawler": "^4.3",
|
||||||
|
"t1gor/robots-txt-parser": "^0.2.4"
|
||||||
},
|
},
|
||||||
"require-dev": {
|
"require-dev": {
|
||||||
"barryvdh/laravel-debugbar": "^3.1",
|
"barryvdh/laravel-debugbar": "^3.1",
|
||||||
|
145
composer.lock
generated
145
composer.lock
generated
@ -4,7 +4,7 @@
|
|||||||
"Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies",
|
"Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies",
|
||||||
"This file is @generated automatically"
|
"This file is @generated automatically"
|
||||||
],
|
],
|
||||||
"content-hash": "1bba68b609be6a0dcdaf05d72e8eb759",
|
"content-hash": "bbb184ff943ae3a938a8370d94b6afb2",
|
||||||
"packages": [
|
"packages": [
|
||||||
{
|
{
|
||||||
"name": "anhskohbo/no-captcha",
|
"name": "anhskohbo/no-captcha",
|
||||||
@ -547,20 +547,6 @@
|
|||||||
"uppercase",
|
"uppercase",
|
||||||
"words"
|
"words"
|
||||||
],
|
],
|
||||||
"funding": [
|
|
||||||
{
|
|
||||||
"url": "https://www.doctrine-project.org/sponsorship.html",
|
|
||||||
"type": "custom"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"url": "https://www.patreon.com/phpdoctrine",
|
|
||||||
"type": "patreon"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"url": "https://tidelift.com/funding/github/packagist/doctrine%2Finflector",
|
|
||||||
"type": "tidelift"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"time": "2020-05-29T15:13:26+00:00"
|
"time": "2020-05-29T15:13:26+00:00"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -623,20 +609,6 @@
|
|||||||
"parser",
|
"parser",
|
||||||
"php"
|
"php"
|
||||||
],
|
],
|
||||||
"funding": [
|
|
||||||
{
|
|
||||||
"url": "https://www.doctrine-project.org/sponsorship.html",
|
|
||||||
"type": "custom"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"url": "https://www.patreon.com/phpdoctrine",
|
|
||||||
"type": "patreon"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"url": "https://tidelift.com/funding/github/packagist/doctrine%2Flexer",
|
|
||||||
"type": "tidelift"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"time": "2020-05-25T17:44:05+00:00"
|
"time": "2020-05-25T17:44:05+00:00"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -5649,6 +5621,63 @@
|
|||||||
],
|
],
|
||||||
"time": "2020-05-30T20:06:45+00:00"
|
"time": "2020-05-30T20:06:45+00:00"
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"name": "t1gor/robots-txt-parser",
|
||||||
|
"version": "v0.2.4",
|
||||||
|
"source": {
|
||||||
|
"type": "git",
|
||||||
|
"url": "https://github.com/t1gor/Robots.txt-Parser-Class.git",
|
||||||
|
"reference": "7ff08da5625fb4f72d17b1528c60aadb184e9e68"
|
||||||
|
},
|
||||||
|
"dist": {
|
||||||
|
"type": "zip",
|
||||||
|
"url": "https://api.github.com/repos/t1gor/Robots.txt-Parser-Class/zipball/7ff08da5625fb4f72d17b1528c60aadb184e9e68",
|
||||||
|
"reference": "7ff08da5625fb4f72d17b1528c60aadb184e9e68",
|
||||||
|
"shasum": ""
|
||||||
|
},
|
||||||
|
"require": {
|
||||||
|
"ext-mbstring": "*",
|
||||||
|
"php": ">=5.5.0",
|
||||||
|
"vipnytt/useragentparser": "^1.0"
|
||||||
|
},
|
||||||
|
"require-dev": {
|
||||||
|
"codeclimate/php-test-reporter": ">=0.2",
|
||||||
|
"phpunit/phpunit": "~3.7"
|
||||||
|
},
|
||||||
|
"type": "library",
|
||||||
|
"autoload": {
|
||||||
|
"classmap": [
|
||||||
|
"source/robotstxtparser.php"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"notification-url": "https://packagist.org/downloads/",
|
||||||
|
"license": [
|
||||||
|
"MIT"
|
||||||
|
],
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "Igor Timoshenkov",
|
||||||
|
"email": "igor.timoshenkov@gmail.com",
|
||||||
|
"role": "creator"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Jan-Petter Gundersen",
|
||||||
|
"email": "jpg@vipnytt.no",
|
||||||
|
"role": "contributor"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"description": "PHP class to parse robots.txt rules according to Google, Yandex, W3C and The Web Robots Pages specifications.",
|
||||||
|
"homepage": "https://github.com/t1gor/Robots.txt-Parser-Class",
|
||||||
|
"keywords": [
|
||||||
|
"The Web Robots Pages",
|
||||||
|
"W3C",
|
||||||
|
"google",
|
||||||
|
"parser",
|
||||||
|
"robots.txt",
|
||||||
|
"yandex"
|
||||||
|
],
|
||||||
|
"time": "2018-07-21T20:01:19+00:00"
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"name": "tijsverkoyen/css-to-inline-styles",
|
"name": "tijsverkoyen/css-to-inline-styles",
|
||||||
"version": "2.2.3",
|
"version": "2.2.3",
|
||||||
@ -5698,6 +5727,64 @@
|
|||||||
"homepage": "https://github.com/tijsverkoyen/CssToInlineStyles",
|
"homepage": "https://github.com/tijsverkoyen/CssToInlineStyles",
|
||||||
"time": "2020-07-13T06:12:54+00:00"
|
"time": "2020-07-13T06:12:54+00:00"
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"name": "vipnytt/useragentparser",
|
||||||
|
"version": "v1.0.4",
|
||||||
|
"source": {
|
||||||
|
"type": "git",
|
||||||
|
"url": "https://github.com/VIPnytt/UserAgentParser.git",
|
||||||
|
"reference": "c5a6718a57088e0d45c2e36f09efabc4e008bd8c"
|
||||||
|
},
|
||||||
|
"dist": {
|
||||||
|
"type": "zip",
|
||||||
|
"url": "https://api.github.com/repos/VIPnytt/UserAgentParser/zipball/c5a6718a57088e0d45c2e36f09efabc4e008bd8c",
|
||||||
|
"reference": "c5a6718a57088e0d45c2e36f09efabc4e008bd8c",
|
||||||
|
"shasum": ""
|
||||||
|
},
|
||||||
|
"require": {
|
||||||
|
"php": "^5.5 || ^7.0"
|
||||||
|
},
|
||||||
|
"require-dev": {
|
||||||
|
"phpunit/phpunit": "^4.8.35 || ^5.7 || ^6.5"
|
||||||
|
},
|
||||||
|
"type": "library",
|
||||||
|
"autoload": {
|
||||||
|
"psr-4": {
|
||||||
|
"vipnytt\\": "src/"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"notification-url": "https://packagist.org/downloads/",
|
||||||
|
"license": [
|
||||||
|
"MIT"
|
||||||
|
],
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "VIP nytt AS",
|
||||||
|
"email": "support@vipnytt.no",
|
||||||
|
"role": "Owner"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Jan-Petter Gundersen",
|
||||||
|
"email": "jpg@vipnytt.no",
|
||||||
|
"role": "Developer"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"description": "User-Agent parser for robot rule sets",
|
||||||
|
"homepage": "https://github.com/VIPnytt/UserAgentParser",
|
||||||
|
"keywords": [
|
||||||
|
"REP",
|
||||||
|
"Robots Exclusion Protocol",
|
||||||
|
"Robots meta tag",
|
||||||
|
"crawler",
|
||||||
|
"robot",
|
||||||
|
"robots.txt",
|
||||||
|
"spider",
|
||||||
|
"user-agent",
|
||||||
|
"useragent",
|
||||||
|
"x-robots-tag"
|
||||||
|
],
|
||||||
|
"time": "2017-12-17T14:23:27+00:00"
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"name": "vlucas/phpdotenv",
|
"name": "vlucas/phpdotenv",
|
||||||
"version": "v3.6.7",
|
"version": "v3.6.7",
|
||||||
|
14
database/factories/ContentProviderFactory.php
Normal file
14
database/factories/ContentProviderFactory.php
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
/** @var \Illuminate\Database\Eloquent\Factory $factory */
|
||||||
|
|
||||||
|
use App\ContentProvider;
|
||||||
|
use Faker\Generator as Faker;
|
||||||
|
|
||||||
|
$factory->define(ContentProvider::class, function (Faker $faker) {
|
||||||
|
return [
|
||||||
|
'host' => 'example.com',
|
||||||
|
'robots' => null,
|
||||||
|
'robots_cached_at' => now(),
|
||||||
|
];
|
||||||
|
});
|
@ -0,0 +1,37 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
use Illuminate\Database\Migrations\Migration;
|
||||||
|
use Illuminate\Database\Schema\Blueprint;
|
||||||
|
use Illuminate\Support\Facades\Schema;
|
||||||
|
|
||||||
|
class CreateContentProvidersTable extends Migration
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* Run the migrations.
|
||||||
|
*
|
||||||
|
* @return void
|
||||||
|
*/
|
||||||
|
public function up()
|
||||||
|
{
|
||||||
|
Schema::create('content_providers', function (Blueprint $table) {
|
||||||
|
$table->string('host');
|
||||||
|
$table->text('robots')->nullable();
|
||||||
|
$table->timestamp('robots_cached_at');
|
||||||
|
$table->boolean('is_blocked')->default(false);
|
||||||
|
$table->integer('access_interval_sec')->default(5);
|
||||||
|
$table->timestamps();
|
||||||
|
|
||||||
|
$table->primary('host');
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reverse the migrations.
|
||||||
|
*
|
||||||
|
* @return void
|
||||||
|
*/
|
||||||
|
public function down()
|
||||||
|
{
|
||||||
|
Schema::dropIfExists('content_providers');
|
||||||
|
}
|
||||||
|
}
|
2
storage/content_providers_lock/.gitignore
vendored
Normal file
2
storage/content_providers_lock/.gitignore
vendored
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
*
|
||||||
|
!.gitignore
|
@ -2,6 +2,7 @@
|
|||||||
|
|
||||||
namespace Tests\Unit\Services;
|
namespace Tests\Unit\Services;
|
||||||
|
|
||||||
|
use App\ContentProvider;
|
||||||
use App\MetadataResolver\MetadataResolver;
|
use App\MetadataResolver\MetadataResolver;
|
||||||
use App\MetadataResolver\ResolverCircuitBreakException;
|
use App\MetadataResolver\ResolverCircuitBreakException;
|
||||||
use App\MetadataResolver\UncaughtResolverException;
|
use App\MetadataResolver\UncaughtResolverException;
|
||||||
@ -26,6 +27,8 @@ class MetadataResolverServiceTest extends TestCase
|
|||||||
parent::setUp();
|
parent::setUp();
|
||||||
$this->seed();
|
$this->seed();
|
||||||
Carbon::setTestNow('2020-07-21 19:19:19');
|
Carbon::setTestNow('2020-07-21 19:19:19');
|
||||||
|
// FIXME: 今書かれてるテストはresolveのHTTPリクエストのみを考慮しているので、ContentProviderにデータがないとリクエスト回数がずれる
|
||||||
|
factory(ContentProvider::class)->create();
|
||||||
}
|
}
|
||||||
|
|
||||||
protected function tearDown(): void
|
protected function tearDown(): void
|
||||||
|
Loading…
Reference in New Issue
Block a user