Merge pull request #468 from shikorism/feature/per-host-resolve-control
リモートホストごとの同時アクセス制御とメタデータ取得ポリシー制御
This commit is contained in:
		
							
								
								
									
										24
									
								
								app/ContentProvider.php
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										24
									
								
								app/ContentProvider.php
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,24 @@
 | 
				
			|||||||
 | 
					<?php
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace App;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					use Illuminate\Database\Eloquent\Model;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class ContentProvider extends Model
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    public $incrementing = false;
 | 
				
			||||||
 | 
					    protected $primaryKey = 'host';
 | 
				
			||||||
 | 
					    protected $keyType = 'string';
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    protected $fillable = [
 | 
				
			||||||
 | 
					        'host',
 | 
				
			||||||
 | 
					        'robots',
 | 
				
			||||||
 | 
					        'robots_cached_at',
 | 
				
			||||||
 | 
					    ];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    protected $dates = [
 | 
				
			||||||
 | 
					        'created_at',
 | 
				
			||||||
 | 
					        'updated_at',
 | 
				
			||||||
 | 
					        'robots_cached_at',
 | 
				
			||||||
 | 
					    ];
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
							
								
								
									
										30
									
								
								app/MetadataResolver/DisallowedByProviderException.php
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										30
									
								
								app/MetadataResolver/DisallowedByProviderException.php
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,30 @@
 | 
				
			|||||||
 | 
					<?php
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace App\MetadataResolver;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					use RuntimeException;
 | 
				
			||||||
 | 
					use Throwable;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/**
 | 
				
			||||||
 | 
					 * ContentProviderの提供するrobots.txtによってクロールが拒否された場合にスローされます。
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					class DisallowedByProviderException extends RuntimeException
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    private $url;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    public function __construct(string $url, Throwable $previous = null)
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					        parent::__construct("Access denied by robots.txt: $url", 0, $previous);
 | 
				
			||||||
 | 
					        $this->url = $url;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    public function getUrl(): string
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					        return $this->url;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    public function getHost(): string
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					        return parse_url($this->url, PHP_URL_HOST);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
@@ -2,14 +2,20 @@
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
namespace App\Services;
 | 
					namespace App\Services;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					use App\ContentProvider;
 | 
				
			||||||
use App\Metadata;
 | 
					use App\Metadata;
 | 
				
			||||||
use App\MetadataResolver\DeniedHostException;
 | 
					use App\MetadataResolver\DeniedHostException;
 | 
				
			||||||
 | 
					use App\MetadataResolver\DisallowedByProviderException;
 | 
				
			||||||
use App\MetadataResolver\MetadataResolver;
 | 
					use App\MetadataResolver\MetadataResolver;
 | 
				
			||||||
use App\MetadataResolver\ResolverCircuitBreakException;
 | 
					use App\MetadataResolver\ResolverCircuitBreakException;
 | 
				
			||||||
use App\MetadataResolver\UncaughtResolverException;
 | 
					use App\MetadataResolver\UncaughtResolverException;
 | 
				
			||||||
use App\Tag;
 | 
					use App\Tag;
 | 
				
			||||||
use App\Utilities\Formatter;
 | 
					use App\Utilities\Formatter;
 | 
				
			||||||
 | 
					use Carbon\Carbon;
 | 
				
			||||||
 | 
					use Carbon\CarbonInterface;
 | 
				
			||||||
 | 
					use GuzzleHttp\Client;
 | 
				
			||||||
use Illuminate\Support\Facades\DB;
 | 
					use Illuminate\Support\Facades\DB;
 | 
				
			||||||
 | 
					use Illuminate\Support\Facades\Log;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class MetadataResolveService
 | 
					class MetadataResolveService
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
@@ -44,13 +50,208 @@ class MetadataResolveService
 | 
				
			|||||||
            throw new DeniedHostException($url);
 | 
					            throw new DeniedHostException($url);
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        DB::beginTransaction();
 | 
					 | 
				
			||||||
        try {
 | 
					 | 
				
			||||||
        $metadata = Metadata::find($url);
 | 
					        $metadata = Metadata::find($url);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        // 無かったら取得
 | 
					        // 無かったら取得
 | 
				
			||||||
        // TODO: ある程度古かったら再取得とかありだと思う
 | 
					        // TODO: ある程度古かったら再取得とかありだと思う
 | 
				
			||||||
        if ($metadata == null || $metadata->needRefresh()) {
 | 
					        if ($metadata == null || $metadata->needRefresh()) {
 | 
				
			||||||
 | 
					            $hostWithPort = $this->getHostWithPortFromUrl($url);
 | 
				
			||||||
 | 
					            $metadata = $this->hostLock($hostWithPort, function (?CarbonInterface $lastAccess) use ($url) {
 | 
				
			||||||
 | 
					                // HostLockの解放待ちをしている間に、他のプロセスで取得完了しているかもしれない
 | 
				
			||||||
 | 
					                $metadata = Metadata::find($url);
 | 
				
			||||||
 | 
					                if ($metadata !== null && !$metadata->needRefresh()) {
 | 
				
			||||||
 | 
					                    return $metadata;
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                $this->checkProviderPolicy($url, $lastAccess);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                return $this->resolve($url, $metadata);
 | 
				
			||||||
 | 
					            });
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        return $metadata;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    /**
 | 
				
			||||||
 | 
					     * URLからホスト部とポート部を抽出
 | 
				
			||||||
 | 
					     * @param string $url
 | 
				
			||||||
 | 
					     * @return string
 | 
				
			||||||
 | 
					     */
 | 
				
			||||||
 | 
					    private function getHostWithPortFromUrl(string $url): string
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					        $parts = parse_url($url);
 | 
				
			||||||
 | 
					        $host = $parts['host'];
 | 
				
			||||||
 | 
					        if (isset($parts['port'])) {
 | 
				
			||||||
 | 
					            $host .= ':' . $parts['port'];
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        return $host;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    /**
 | 
				
			||||||
 | 
					     * アクセス先ホスト単位の排他ロックを取って処理を実行
 | 
				
			||||||
 | 
					     * @param string $host
 | 
				
			||||||
 | 
					     * @param callable $fn
 | 
				
			||||||
 | 
					     * @return mixed return of $fn
 | 
				
			||||||
 | 
					     * @throws \RuntimeException いろいろな死に方をする
 | 
				
			||||||
 | 
					     */
 | 
				
			||||||
 | 
					    private function hostLock(string $host, callable $fn)
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					        $lockDir = storage_path('content_providers_lock');
 | 
				
			||||||
 | 
					        if (!file_exists($lockDir)) {
 | 
				
			||||||
 | 
					            if (!mkdir($lockDir)) {
 | 
				
			||||||
 | 
					                throw new \RuntimeException("Lock failed! Can't create lock directory.");
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        $lockFile = $lockDir . DIRECTORY_SEPARATOR . $host;
 | 
				
			||||||
 | 
					        $fp = fopen($lockFile, 'c+b');
 | 
				
			||||||
 | 
					        if ($fp === false) {
 | 
				
			||||||
 | 
					            throw new \RuntimeException("Lock failed! Can't open lock file.");
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        try {
 | 
				
			||||||
 | 
					            if (!flock($fp, LOCK_EX)) {
 | 
				
			||||||
 | 
					                throw new \RuntimeException("Lock failed! Can't lock file.");
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            try {
 | 
				
			||||||
 | 
					                $accessInfoText = stream_get_contents($fp);
 | 
				
			||||||
 | 
					                if ($accessInfoText !== false) {
 | 
				
			||||||
 | 
					                    $accessInfo = json_decode($accessInfoText, true);
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                $result = $fn(isset($accessInfo['time']) ? new Carbon($accessInfo['time']) : null);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                $accessInfo = [
 | 
				
			||||||
 | 
					                    'time' => now()->toIso8601String()
 | 
				
			||||||
 | 
					                ];
 | 
				
			||||||
 | 
					                fseek($fp, 0);
 | 
				
			||||||
 | 
					                if (fwrite($fp, json_encode($accessInfo)) === false) {
 | 
				
			||||||
 | 
					                    throw new \RuntimeException("I/O Error! Can't write to lock file.");
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                return $result;
 | 
				
			||||||
 | 
					            } finally {
 | 
				
			||||||
 | 
					                if (!flock($fp, LOCK_UN)) {
 | 
				
			||||||
 | 
					                    throw new \RuntimeException("Unlock failed! Can't unlock file.");
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        } finally {
 | 
				
			||||||
 | 
					            if (!fclose($fp)) {
 | 
				
			||||||
 | 
					                throw new \RuntimeException("Unlock failed! Can't close lock file.");
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    /**
 | 
				
			||||||
 | 
					     * 指定したメタデータURLのホストが持つrobots.txtをダウンロードします。
 | 
				
			||||||
 | 
					     * @param string $url メタデータのURL
 | 
				
			||||||
 | 
					     * @return string
 | 
				
			||||||
 | 
					     */
 | 
				
			||||||
 | 
					    private function fetchRobotsTxt(string $url): ?string
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					        $parts = parse_url($url);
 | 
				
			||||||
 | 
					        $robotsUrl = http_build_url([
 | 
				
			||||||
 | 
					            'scheme' => $parts['scheme'],
 | 
				
			||||||
 | 
					            'host' => $parts['host'],
 | 
				
			||||||
 | 
					            'port' => $parts['port'] ?? null,
 | 
				
			||||||
 | 
					            'path' => '/robots.txt'
 | 
				
			||||||
 | 
					        ]);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        $client = app(Client::class);
 | 
				
			||||||
 | 
					        try {
 | 
				
			||||||
 | 
					            $res = $client->get($robotsUrl);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            return (string) $res->getBody();
 | 
				
			||||||
 | 
					        } catch (\Exception $e) {
 | 
				
			||||||
 | 
					            Log::error("robots.txtの取得に失敗: {$e}");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            return null;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    /**
 | 
				
			||||||
 | 
					     * ContentProviderポリシー情報との照合を行い、アクセス可能かチェックします。アクセスできない場合は例外をスローします。
 | 
				
			||||||
 | 
					     * @param string $url メタデータを取得したいURL
 | 
				
			||||||
 | 
					     * @param CarbonInterface|null $lastAccess アクセス先ホストへの最終アクセス日時 (記録がある場合)
 | 
				
			||||||
 | 
					     * @throws DeniedHostException アクセス先がTissue内のブラックリストに入っている場合にスロー
 | 
				
			||||||
 | 
					     * @throws DisallowedByProviderException アクセス先のrobots.txtによって拒否されている場合にスロー
 | 
				
			||||||
 | 
					     */
 | 
				
			||||||
 | 
					    private function checkProviderPolicy(string $url, ?CarbonInterface $lastAccess): void
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					        DB::beginTransaction();
 | 
				
			||||||
 | 
					        try {
 | 
				
			||||||
 | 
					            $hostWithPort = $this->getHostWithPortFromUrl($url);
 | 
				
			||||||
 | 
					            $contentProvider = ContentProvider::sharedLock()->find($hostWithPort);
 | 
				
			||||||
 | 
					            if ($contentProvider === null) {
 | 
				
			||||||
 | 
					                $contentProvider = ContentProvider::create([
 | 
				
			||||||
 | 
					                    'host' => $hostWithPort,
 | 
				
			||||||
 | 
					                    'robots' => $this->fetchRobotsTxt($url),
 | 
				
			||||||
 | 
					                    'robots_cached_at' => now(),
 | 
				
			||||||
 | 
					                ]);
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            if ($contentProvider->is_blocked) {
 | 
				
			||||||
 | 
					                throw new DeniedHostException($url);
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            // 連続アクセス制限
 | 
				
			||||||
 | 
					            if ($lastAccess !== null) {
 | 
				
			||||||
 | 
					                $elapsedSeconds = $lastAccess->diffInSeconds(now(), false);
 | 
				
			||||||
 | 
					                if ($elapsedSeconds < $contentProvider->access_interval_sec) {
 | 
				
			||||||
 | 
					                    if ($elapsedSeconds < 0) {
 | 
				
			||||||
 | 
					                        $wait = abs($elapsedSeconds) + $contentProvider->access_interval_sec;
 | 
				
			||||||
 | 
					                    } else {
 | 
				
			||||||
 | 
					                        $wait = $contentProvider->access_interval_sec - $elapsedSeconds;
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
 | 
					                    sleep($wait);
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            // Fetch robots.txt
 | 
				
			||||||
 | 
					            if ($contentProvider->robots_cached_at->diffInDays(now()) >= 7) {
 | 
				
			||||||
 | 
					                $contentProvider->update([
 | 
				
			||||||
 | 
					                    'robots' => $this->fetchRobotsTxt($url),
 | 
				
			||||||
 | 
					                    'robots_cached_at' => now(),
 | 
				
			||||||
 | 
					                ]);
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            // Check robots.txt
 | 
				
			||||||
 | 
					            $robotsParser = new \RobotsTxtParser($contentProvider->robots);
 | 
				
			||||||
 | 
					            $robotsParser->setUserAgent('TissueBot');
 | 
				
			||||||
 | 
					            $robotsDelay = $robotsParser->getDelay();
 | 
				
			||||||
 | 
					            if ($robotsDelay !== 0 && $robotsDelay >= $contentProvider->access_interval_sec) {
 | 
				
			||||||
 | 
					                $contentProvider->access_interval_sec = (int) $robotsDelay;
 | 
				
			||||||
 | 
					                $contentProvider->save();
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            if ($robotsParser->isDisallowed(parse_url($url, PHP_URL_PATH))) {
 | 
				
			||||||
 | 
					                throw new DisallowedByProviderException($url);
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            DB::commit();
 | 
				
			||||||
 | 
					        } catch (DeniedHostException | DisallowedByProviderException $e) {
 | 
				
			||||||
 | 
					            // ContentProviderのデータ更新は行うため
 | 
				
			||||||
 | 
					            DB::commit();
 | 
				
			||||||
 | 
					            throw $e;
 | 
				
			||||||
 | 
					        } catch (\Exception $e) {
 | 
				
			||||||
 | 
					            DB::rollBack();
 | 
				
			||||||
 | 
					            throw $e;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    /**
 | 
				
			||||||
 | 
					     * メタデータをリモートサーバに問い合わせて取得します。
 | 
				
			||||||
 | 
					     * @param string $url メタデータを取得したいURL
 | 
				
			||||||
 | 
					     * @param Metadata|null $metadata キャッシュ済のメタデータ (存在する場合)
 | 
				
			||||||
 | 
					     * @return Metadata 取得できたメタデータ
 | 
				
			||||||
 | 
					     * @throws UncaughtResolverException Resolver内で例外が発生して取得できなかった場合にスロー
 | 
				
			||||||
 | 
					     * @throws ResolverCircuitBreakException 規定回数以上の解決失敗により、メタデータの取得が不能となっている場合にスロー
 | 
				
			||||||
 | 
					     */
 | 
				
			||||||
 | 
					    private function resolve(string $url, ?Metadata $metadata): Metadata
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					        DB::beginTransaction();
 | 
				
			||||||
 | 
					        try {
 | 
				
			||||||
            if ($metadata === null) {
 | 
					            if ($metadata === null) {
 | 
				
			||||||
                $metadata = new Metadata(['url' => $url]);
 | 
					                $metadata = new Metadata(['url' => $url]);
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
@@ -84,7 +285,6 @@ class MetadataResolveService
 | 
				
			|||||||
                $tagIds[] = $tag->id;
 | 
					                $tagIds[] = $tag->id;
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
            $metadata->tags()->sync($tagIds);
 | 
					            $metadata->tags()->sync($tagIds);
 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
            DB::commit();
 | 
					            DB::commit();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -33,7 +33,8 @@
 | 
				
			|||||||
        "sentry/sentry-laravel": "1.8.0",
 | 
					        "sentry/sentry-laravel": "1.8.0",
 | 
				
			||||||
        "staudenmeir/eloquent-eager-limit": "^1.0",
 | 
					        "staudenmeir/eloquent-eager-limit": "^1.0",
 | 
				
			||||||
        "symfony/css-selector": "^4.3",
 | 
					        "symfony/css-selector": "^4.3",
 | 
				
			||||||
        "symfony/dom-crawler": "^4.3"
 | 
					        "symfony/dom-crawler": "^4.3",
 | 
				
			||||||
 | 
					        "t1gor/robots-txt-parser": "^0.2.4"
 | 
				
			||||||
    },
 | 
					    },
 | 
				
			||||||
    "require-dev": {
 | 
					    "require-dev": {
 | 
				
			||||||
        "barryvdh/laravel-debugbar": "^3.1",
 | 
					        "barryvdh/laravel-debugbar": "^3.1",
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										145
									
								
								composer.lock
									
									
									
										generated
									
									
									
								
							
							
						
						
									
										145
									
								
								composer.lock
									
									
									
										generated
									
									
									
								
							@@ -4,7 +4,7 @@
 | 
				
			|||||||
        "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies",
 | 
					        "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies",
 | 
				
			||||||
        "This file is @generated automatically"
 | 
					        "This file is @generated automatically"
 | 
				
			||||||
    ],
 | 
					    ],
 | 
				
			||||||
    "content-hash": "1bba68b609be6a0dcdaf05d72e8eb759",
 | 
					    "content-hash": "bbb184ff943ae3a938a8370d94b6afb2",
 | 
				
			||||||
    "packages": [
 | 
					    "packages": [
 | 
				
			||||||
        {
 | 
					        {
 | 
				
			||||||
            "name": "anhskohbo/no-captcha",
 | 
					            "name": "anhskohbo/no-captcha",
 | 
				
			||||||
@@ -547,20 +547,6 @@
 | 
				
			|||||||
                "uppercase",
 | 
					                "uppercase",
 | 
				
			||||||
                "words"
 | 
					                "words"
 | 
				
			||||||
            ],
 | 
					            ],
 | 
				
			||||||
            "funding": [
 | 
					 | 
				
			||||||
                {
 | 
					 | 
				
			||||||
                    "url": "https://www.doctrine-project.org/sponsorship.html",
 | 
					 | 
				
			||||||
                    "type": "custom"
 | 
					 | 
				
			||||||
                },
 | 
					 | 
				
			||||||
                {
 | 
					 | 
				
			||||||
                    "url": "https://www.patreon.com/phpdoctrine",
 | 
					 | 
				
			||||||
                    "type": "patreon"
 | 
					 | 
				
			||||||
                },
 | 
					 | 
				
			||||||
                {
 | 
					 | 
				
			||||||
                    "url": "https://tidelift.com/funding/github/packagist/doctrine%2Finflector",
 | 
					 | 
				
			||||||
                    "type": "tidelift"
 | 
					 | 
				
			||||||
                }
 | 
					 | 
				
			||||||
            ],
 | 
					 | 
				
			||||||
            "time": "2020-05-29T15:13:26+00:00"
 | 
					            "time": "2020-05-29T15:13:26+00:00"
 | 
				
			||||||
        },
 | 
					        },
 | 
				
			||||||
        {
 | 
					        {
 | 
				
			||||||
@@ -623,20 +609,6 @@
 | 
				
			|||||||
                "parser",
 | 
					                "parser",
 | 
				
			||||||
                "php"
 | 
					                "php"
 | 
				
			||||||
            ],
 | 
					            ],
 | 
				
			||||||
            "funding": [
 | 
					 | 
				
			||||||
                {
 | 
					 | 
				
			||||||
                    "url": "https://www.doctrine-project.org/sponsorship.html",
 | 
					 | 
				
			||||||
                    "type": "custom"
 | 
					 | 
				
			||||||
                },
 | 
					 | 
				
			||||||
                {
 | 
					 | 
				
			||||||
                    "url": "https://www.patreon.com/phpdoctrine",
 | 
					 | 
				
			||||||
                    "type": "patreon"
 | 
					 | 
				
			||||||
                },
 | 
					 | 
				
			||||||
                {
 | 
					 | 
				
			||||||
                    "url": "https://tidelift.com/funding/github/packagist/doctrine%2Flexer",
 | 
					 | 
				
			||||||
                    "type": "tidelift"
 | 
					 | 
				
			||||||
                }
 | 
					 | 
				
			||||||
            ],
 | 
					 | 
				
			||||||
            "time": "2020-05-25T17:44:05+00:00"
 | 
					            "time": "2020-05-25T17:44:05+00:00"
 | 
				
			||||||
        },
 | 
					        },
 | 
				
			||||||
        {
 | 
					        {
 | 
				
			||||||
@@ -5649,6 +5621,63 @@
 | 
				
			|||||||
            ],
 | 
					            ],
 | 
				
			||||||
            "time": "2020-05-30T20:06:45+00:00"
 | 
					            "time": "2020-05-30T20:06:45+00:00"
 | 
				
			||||||
        },
 | 
					        },
 | 
				
			||||||
 | 
					        {
 | 
				
			||||||
 | 
					            "name": "t1gor/robots-txt-parser",
 | 
				
			||||||
 | 
					            "version": "v0.2.4",
 | 
				
			||||||
 | 
					            "source": {
 | 
				
			||||||
 | 
					                "type": "git",
 | 
				
			||||||
 | 
					                "url": "https://github.com/t1gor/Robots.txt-Parser-Class.git",
 | 
				
			||||||
 | 
					                "reference": "7ff08da5625fb4f72d17b1528c60aadb184e9e68"
 | 
				
			||||||
 | 
					            },
 | 
				
			||||||
 | 
					            "dist": {
 | 
				
			||||||
 | 
					                "type": "zip",
 | 
				
			||||||
 | 
					                "url": "https://api.github.com/repos/t1gor/Robots.txt-Parser-Class/zipball/7ff08da5625fb4f72d17b1528c60aadb184e9e68",
 | 
				
			||||||
 | 
					                "reference": "7ff08da5625fb4f72d17b1528c60aadb184e9e68",
 | 
				
			||||||
 | 
					                "shasum": ""
 | 
				
			||||||
 | 
					            },
 | 
				
			||||||
 | 
					            "require": {
 | 
				
			||||||
 | 
					                "ext-mbstring": "*",
 | 
				
			||||||
 | 
					                "php": ">=5.5.0",
 | 
				
			||||||
 | 
					                "vipnytt/useragentparser": "^1.0"
 | 
				
			||||||
 | 
					            },
 | 
				
			||||||
 | 
					            "require-dev": {
 | 
				
			||||||
 | 
					                "codeclimate/php-test-reporter": ">=0.2",
 | 
				
			||||||
 | 
					                "phpunit/phpunit": "~3.7"
 | 
				
			||||||
 | 
					            },
 | 
				
			||||||
 | 
					            "type": "library",
 | 
				
			||||||
 | 
					            "autoload": {
 | 
				
			||||||
 | 
					                "classmap": [
 | 
				
			||||||
 | 
					                    "source/robotstxtparser.php"
 | 
				
			||||||
 | 
					                ]
 | 
				
			||||||
 | 
					            },
 | 
				
			||||||
 | 
					            "notification-url": "https://packagist.org/downloads/",
 | 
				
			||||||
 | 
					            "license": [
 | 
				
			||||||
 | 
					                "MIT"
 | 
				
			||||||
 | 
					            ],
 | 
				
			||||||
 | 
					            "authors": [
 | 
				
			||||||
 | 
					                {
 | 
				
			||||||
 | 
					                    "name": "Igor Timoshenkov",
 | 
				
			||||||
 | 
					                    "email": "igor.timoshenkov@gmail.com",
 | 
				
			||||||
 | 
					                    "role": "creator"
 | 
				
			||||||
 | 
					                },
 | 
				
			||||||
 | 
					                {
 | 
				
			||||||
 | 
					                    "name": "Jan-Petter Gundersen",
 | 
				
			||||||
 | 
					                    "email": "jpg@vipnytt.no",
 | 
				
			||||||
 | 
					                    "role": "contributor"
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					            ],
 | 
				
			||||||
 | 
					            "description": "PHP class to parse robots.txt rules according to Google, Yandex, W3C and The Web Robots Pages specifications.",
 | 
				
			||||||
 | 
					            "homepage": "https://github.com/t1gor/Robots.txt-Parser-Class",
 | 
				
			||||||
 | 
					            "keywords": [
 | 
				
			||||||
 | 
					                "The Web Robots Pages",
 | 
				
			||||||
 | 
					                "W3C",
 | 
				
			||||||
 | 
					                "google",
 | 
				
			||||||
 | 
					                "parser",
 | 
				
			||||||
 | 
					                "robots.txt",
 | 
				
			||||||
 | 
					                "yandex"
 | 
				
			||||||
 | 
					            ],
 | 
				
			||||||
 | 
					            "time": "2018-07-21T20:01:19+00:00"
 | 
				
			||||||
 | 
					        },
 | 
				
			||||||
        {
 | 
					        {
 | 
				
			||||||
            "name": "tijsverkoyen/css-to-inline-styles",
 | 
					            "name": "tijsverkoyen/css-to-inline-styles",
 | 
				
			||||||
            "version": "2.2.3",
 | 
					            "version": "2.2.3",
 | 
				
			||||||
@@ -5698,6 +5727,64 @@
 | 
				
			|||||||
            "homepage": "https://github.com/tijsverkoyen/CssToInlineStyles",
 | 
					            "homepage": "https://github.com/tijsverkoyen/CssToInlineStyles",
 | 
				
			||||||
            "time": "2020-07-13T06:12:54+00:00"
 | 
					            "time": "2020-07-13T06:12:54+00:00"
 | 
				
			||||||
        },
 | 
					        },
 | 
				
			||||||
 | 
					        {
 | 
				
			||||||
 | 
					            "name": "vipnytt/useragentparser",
 | 
				
			||||||
 | 
					            "version": "v1.0.4",
 | 
				
			||||||
 | 
					            "source": {
 | 
				
			||||||
 | 
					                "type": "git",
 | 
				
			||||||
 | 
					                "url": "https://github.com/VIPnytt/UserAgentParser.git",
 | 
				
			||||||
 | 
					                "reference": "c5a6718a57088e0d45c2e36f09efabc4e008bd8c"
 | 
				
			||||||
 | 
					            },
 | 
				
			||||||
 | 
					            "dist": {
 | 
				
			||||||
 | 
					                "type": "zip",
 | 
				
			||||||
 | 
					                "url": "https://api.github.com/repos/VIPnytt/UserAgentParser/zipball/c5a6718a57088e0d45c2e36f09efabc4e008bd8c",
 | 
				
			||||||
 | 
					                "reference": "c5a6718a57088e0d45c2e36f09efabc4e008bd8c",
 | 
				
			||||||
 | 
					                "shasum": ""
 | 
				
			||||||
 | 
					            },
 | 
				
			||||||
 | 
					            "require": {
 | 
				
			||||||
 | 
					                "php": "^5.5 || ^7.0"
 | 
				
			||||||
 | 
					            },
 | 
				
			||||||
 | 
					            "require-dev": {
 | 
				
			||||||
 | 
					                "phpunit/phpunit": "^4.8.35 || ^5.7 || ^6.5"
 | 
				
			||||||
 | 
					            },
 | 
				
			||||||
 | 
					            "type": "library",
 | 
				
			||||||
 | 
					            "autoload": {
 | 
				
			||||||
 | 
					                "psr-4": {
 | 
				
			||||||
 | 
					                    "vipnytt\\": "src/"
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					            },
 | 
				
			||||||
 | 
					            "notification-url": "https://packagist.org/downloads/",
 | 
				
			||||||
 | 
					            "license": [
 | 
				
			||||||
 | 
					                "MIT"
 | 
				
			||||||
 | 
					            ],
 | 
				
			||||||
 | 
					            "authors": [
 | 
				
			||||||
 | 
					                {
 | 
				
			||||||
 | 
					                    "name": "VIP nytt AS",
 | 
				
			||||||
 | 
					                    "email": "support@vipnytt.no",
 | 
				
			||||||
 | 
					                    "role": "Owner"
 | 
				
			||||||
 | 
					                },
 | 
				
			||||||
 | 
					                {
 | 
				
			||||||
 | 
					                    "name": "Jan-Petter Gundersen",
 | 
				
			||||||
 | 
					                    "email": "jpg@vipnytt.no",
 | 
				
			||||||
 | 
					                    "role": "Developer"
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					            ],
 | 
				
			||||||
 | 
					            "description": "User-Agent parser for robot rule sets",
 | 
				
			||||||
 | 
					            "homepage": "https://github.com/VIPnytt/UserAgentParser",
 | 
				
			||||||
 | 
					            "keywords": [
 | 
				
			||||||
 | 
					                "REP",
 | 
				
			||||||
 | 
					                "Robots Exclusion Protocol",
 | 
				
			||||||
 | 
					                "Robots meta tag",
 | 
				
			||||||
 | 
					                "crawler",
 | 
				
			||||||
 | 
					                "robot",
 | 
				
			||||||
 | 
					                "robots.txt",
 | 
				
			||||||
 | 
					                "spider",
 | 
				
			||||||
 | 
					                "user-agent",
 | 
				
			||||||
 | 
					                "useragent",
 | 
				
			||||||
 | 
					                "x-robots-tag"
 | 
				
			||||||
 | 
					            ],
 | 
				
			||||||
 | 
					            "time": "2017-12-17T14:23:27+00:00"
 | 
				
			||||||
 | 
					        },
 | 
				
			||||||
        {
 | 
					        {
 | 
				
			||||||
            "name": "vlucas/phpdotenv",
 | 
					            "name": "vlucas/phpdotenv",
 | 
				
			||||||
            "version": "v3.6.7",
 | 
					            "version": "v3.6.7",
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										14
									
								
								database/factories/ContentProviderFactory.php
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										14
									
								
								database/factories/ContentProviderFactory.php
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,14 @@
 | 
				
			|||||||
 | 
					<?php
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/** @var \Illuminate\Database\Eloquent\Factory $factory */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					use App\ContentProvider;
 | 
				
			||||||
 | 
					use Faker\Generator as Faker;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					$factory->define(ContentProvider::class, function (Faker $faker) {
 | 
				
			||||||
 | 
					    return [
 | 
				
			||||||
 | 
					        'host' => 'example.com',
 | 
				
			||||||
 | 
					        'robots' => null,
 | 
				
			||||||
 | 
					        'robots_cached_at' => now(),
 | 
				
			||||||
 | 
					    ];
 | 
				
			||||||
 | 
					});
 | 
				
			||||||
@@ -0,0 +1,37 @@
 | 
				
			|||||||
 | 
					<?php
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					use Illuminate\Database\Migrations\Migration;
 | 
				
			||||||
 | 
					use Illuminate\Database\Schema\Blueprint;
 | 
				
			||||||
 | 
					use Illuminate\Support\Facades\Schema;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class CreateContentProvidersTable extends Migration
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    /**
 | 
				
			||||||
 | 
					     * Run the migrations.
 | 
				
			||||||
 | 
					     *
 | 
				
			||||||
 | 
					     * @return void
 | 
				
			||||||
 | 
					     */
 | 
				
			||||||
 | 
					    public function up()
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					        Schema::create('content_providers', function (Blueprint $table) {
 | 
				
			||||||
 | 
					            $table->string('host');
 | 
				
			||||||
 | 
					            $table->text('robots')->nullable();
 | 
				
			||||||
 | 
					            $table->timestamp('robots_cached_at');
 | 
				
			||||||
 | 
					            $table->boolean('is_blocked')->default(false);
 | 
				
			||||||
 | 
					            $table->integer('access_interval_sec')->default(5);
 | 
				
			||||||
 | 
					            $table->timestamps();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            $table->primary('host');
 | 
				
			||||||
 | 
					        });
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    /**
 | 
				
			||||||
 | 
					     * Reverse the migrations.
 | 
				
			||||||
 | 
					     *
 | 
				
			||||||
 | 
					     * @return void
 | 
				
			||||||
 | 
					     */
 | 
				
			||||||
 | 
					    public function down()
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					        Schema::dropIfExists('content_providers');
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
							
								
								
									
										2
									
								
								storage/content_providers_lock/.gitignore
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										2
									
								
								storage/content_providers_lock/.gitignore
									
									
									
									
										vendored
									
									
										Normal file
									
								
							@@ -0,0 +1,2 @@
 | 
				
			|||||||
 | 
					*
 | 
				
			||||||
 | 
					!.gitignore
 | 
				
			||||||
@@ -2,6 +2,7 @@
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
namespace Tests\Unit\Services;
 | 
					namespace Tests\Unit\Services;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					use App\ContentProvider;
 | 
				
			||||||
use App\MetadataResolver\MetadataResolver;
 | 
					use App\MetadataResolver\MetadataResolver;
 | 
				
			||||||
use App\MetadataResolver\ResolverCircuitBreakException;
 | 
					use App\MetadataResolver\ResolverCircuitBreakException;
 | 
				
			||||||
use App\MetadataResolver\UncaughtResolverException;
 | 
					use App\MetadataResolver\UncaughtResolverException;
 | 
				
			||||||
@@ -26,6 +27,8 @@ class MetadataResolverServiceTest extends TestCase
 | 
				
			|||||||
        parent::setUp();
 | 
					        parent::setUp();
 | 
				
			||||||
        $this->seed();
 | 
					        $this->seed();
 | 
				
			||||||
        Carbon::setTestNow('2020-07-21 19:19:19');
 | 
					        Carbon::setTestNow('2020-07-21 19:19:19');
 | 
				
			||||||
 | 
					        // FIXME: 今書かれてるテストはresolveのHTTPリクエストのみを考慮しているので、ContentProviderにデータがないとリクエスト回数がずれる
 | 
				
			||||||
 | 
					        factory(ContentProvider::class)->create();
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    protected function tearDown(): void
 | 
					    protected function tearDown(): void
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user