From c7aa0026251c884ce6ad5ab92e7889aeec827cb6 Mon Sep 17 00:00:00 2001 From: shibafu Date: Thu, 30 Jul 2020 00:46:33 +0900 Subject: [PATCH 1/7] =?UTF-8?q?=E3=81=AA=E3=82=93=E3=82=82=E3=82=8F?= =?UTF-8?q?=E3=81=8B=E3=82=89=E3=82=93=E3=82=8F=E3=83=BC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/Console/Commands/NormalizeTags.php | 50 ++++++++++++++++++++++++++ app/Utilities/Formatter.php | 11 ++++++ 2 files changed, 61 insertions(+) create mode 100644 app/Console/Commands/NormalizeTags.php diff --git a/app/Console/Commands/NormalizeTags.php b/app/Console/Commands/NormalizeTags.php new file mode 100644 index 0000000..4e9d8ed --- /dev/null +++ b/app/Console/Commands/NormalizeTags.php @@ -0,0 +1,50 @@ +formatter = $formatter; + } + + /** + * Execute the console command. + * + * @return mixed + */ + public function handle() + { + foreach (Tag::query()->orderBy('name')->cursor() as $tag) { + $normalizedName = $this->formatter->normalizeToSearchIndex($tag->name); + $this->line("{$tag->name} : {$normalizedName}"); + } + } +} diff --git a/app/Utilities/Formatter.php b/app/Utilities/Formatter.php index 7ac108b..aa09c78 100644 --- a/app/Utilities/Formatter.php +++ b/app/Utilities/Formatter.php @@ -132,4 +132,15 @@ class Formatter return $bytes . 'B'; } + + public function normalizeToSearchIndex(string $text): string + { + $text = \Normalizer::normalize($text, \Normalizer::FORM_KC); +// $text = \Transliterator::create('Katakana-Hiragana')->transliterate($text); + $text = mb_convert_kana($text, 'c'); + $text = preg_replace('/[^\p{L}\p{N}]/u', '', $text); + $text = mb_strtolower($text); + + return $text; + } } From e2c43fef802dbdc7ab046bf5c5cc6fd61eb5d634 Mon Sep 17 00:00:00 2001 From: shibafu Date: Thu, 30 Jul 2020 22:42:10 +0900 Subject: [PATCH 2/7] tags.normalized_name --- app/Console/Commands/NormalizeTags.php | 19 ++++++++--- app/Tag.php | 10 ++++++ app/Utilities/Formatter.php | 11 +++---- ..._30_221302_add_normalized_name_to_tags.php | 32 +++++++++++++++++++ 4 files changed, 61 insertions(+), 11 deletions(-) create mode 100644 database/migrations/2020_07_30_221302_add_normalized_name_to_tags.php diff --git a/app/Console/Commands/NormalizeTags.php b/app/Console/Commands/NormalizeTags.php index 4e9d8ed..9d6cf30 100644 --- a/app/Console/Commands/NormalizeTags.php +++ b/app/Console/Commands/NormalizeTags.php @@ -4,6 +4,7 @@ namespace App\Console\Commands; use App\Tag; use App\Utilities\Formatter; +use DB; use Illuminate\Console\Command; class NormalizeTags extends Command @@ -42,9 +43,19 @@ class NormalizeTags extends Command */ public function handle() { - foreach (Tag::query()->orderBy('name')->cursor() as $tag) { - $normalizedName = $this->formatter->normalizeToSearchIndex($tag->name); - $this->line("{$tag->name} : {$normalizedName}"); - } + $start = hrtime(true); + + DB::transaction(function () { + /** @var Tag $tag */ + foreach (Tag::query()->cursor() as $tag) { + $normalizedName = $this->formatter->normalizeTagName($tag->name); + $this->line("{$tag->name} : {$normalizedName}"); + $tag->normalized_name = $normalizedName; + $tag->save(); + } + }); + + $elapsed = (hrtime(true) - $start) / 1e+9; + $this->info("Done! ({$elapsed} sec)"); } } diff --git a/app/Tag.php b/app/Tag.php index df057f5..895492e 100644 --- a/app/Tag.php +++ b/app/Tag.php @@ -2,6 +2,7 @@ namespace App; +use App\Utilities\Formatter; use Illuminate\Database\Eloquent\Model; class Tag extends Model @@ -15,6 +16,15 @@ class Tag extends Model 'name' ]; + protected static function boot() + { + parent::boot(); + + self::creating(function (Tag $tag) { + $tag->normalized_name = app(Formatter::class)->normalizeTagName($tag->name); + }); + } + public function ejaculations() { return $this->belongsToMany('App\Ejaculation')->withTimestamps(); diff --git a/app/Utilities/Formatter.php b/app/Utilities/Formatter.php index aa09c78..f15901a 100644 --- a/app/Utilities/Formatter.php +++ b/app/Utilities/Formatter.php @@ -133,14 +133,11 @@ class Formatter return $bytes . 'B'; } - public function normalizeToSearchIndex(string $text): string + public function normalizeTagName(string $name) { - $text = \Normalizer::normalize($text, \Normalizer::FORM_KC); -// $text = \Transliterator::create('Katakana-Hiragana')->transliterate($text); - $text = mb_convert_kana($text, 'c'); - $text = preg_replace('/[^\p{L}\p{N}]/u', '', $text); - $text = mb_strtolower($text); + $name = \Normalizer::normalize($name, \Normalizer::FORM_KC); + $name = mb_strtolower($name); - return $text; + return $name; } } diff --git a/database/migrations/2020_07_30_221302_add_normalized_name_to_tags.php b/database/migrations/2020_07_30_221302_add_normalized_name_to_tags.php new file mode 100644 index 0000000..eead2a0 --- /dev/null +++ b/database/migrations/2020_07_30_221302_add_normalized_name_to_tags.php @@ -0,0 +1,32 @@ +string('normalized_name')->nullable(); + }); + } + + /** + * Reverse the migrations. + * + * @return void + */ + public function down() + { + Schema::table('tags', function (Blueprint $table) { + $table->dropColumn('normalized_name'); + }); + } +} From d18f245129116b84fef34e36964de4de0cfde11d Mon Sep 17 00:00:00 2001 From: shibafu Date: Thu, 30 Jul 2020 23:04:39 +0900 Subject: [PATCH 3/7] Docker: use php-intl --- Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index fd99670..d8530ac 100644 --- a/Dockerfile +++ b/Dockerfile @@ -5,8 +5,8 @@ FROM php:7.3-apache ENV APACHE_DOCUMENT_ROOT /var/www/html/public RUN apt-get update \ - && apt-get install -y git libpq-dev unzip \ - && docker-php-ext-install pdo_pgsql \ + && apt-get install -y git libpq-dev unzip libicu-dev \ + && docker-php-ext-install pdo_pgsql intl \ && pecl install xdebug \ && curl -sS https://getcomposer.org/installer | php \ && mv composer.phar /usr/local/bin/composer \ From 561c9d028da59549356e1a70f54e2d4388fa8f43 Mon Sep 17 00:00:00 2001 From: shibafu Date: Thu, 30 Jul 2020 23:12:29 +0900 Subject: [PATCH 4/7] =?UTF-8?q?=E6=A4=9C=E7=B4=A2=E6=99=82=E3=81=AB?= =?UTF-8?q?=E3=81=AFtags.normalized=5Fname=E3=82=92=E4=BD=BF=E3=81=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/Http/Controllers/SearchController.php | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/app/Http/Controllers/SearchController.php b/app/Http/Controllers/SearchController.php index 0487344..ee4b5c0 100644 --- a/app/Http/Controllers/SearchController.php +++ b/app/Http/Controllers/SearchController.php @@ -4,20 +4,30 @@ namespace App\Http\Controllers; use App\Ejaculation; use App\Tag; +use App\Utilities\Formatter; use Illuminate\Http\Request; use Illuminate\Support\Facades\Auth; class SearchController extends Controller { + /** @var Formatter */ + private $formatter; + + public function __construct(Formatter $formatter) + { + $this->formatter = $formatter; + } + public function index(Request $request) { $inputs = $request->validate([ 'q' => 'required' ]); + $q = $this->normalizeQuery($inputs['q']); $results = Ejaculation::query() - ->whereHas('tags', function ($query) use ($inputs) { - $query->where('name', 'like', "%{$inputs['q']}%"); + ->whereHas('tags', function ($query) use ($q) { + $query->where('normalized_name', 'like', "%{$q}%"); }) ->whereHas('user', function ($query) { $query->where('is_protected', false); @@ -41,11 +51,17 @@ class SearchController extends Controller 'q' => 'required' ]); + $q = $this->normalizeQuery($inputs['q']); $results = Tag::query() - ->where('name', 'like', "%{$inputs['q']}%") + ->where('normalized_name', 'like', "%{$q}%") ->paginate(50) ->appends($inputs); return view('search.relatedTag')->with(compact('inputs', 'results')); } + + private function normalizeQuery(string $query): string + { + return $this->formatter->normalizeTagName($query); + } } From c9efcb538cae0b2a68f60c3b4df36efc86c1e210 Mon Sep 17 00:00:00 2001 From: shibafu Date: Fri, 31 Jul 2020 22:21:31 +0900 Subject: [PATCH 5/7] add extension requirements --- composer.json | 6 +++ composer.lock | 104 +++++--------------------------------------------- 2 files changed, 16 insertions(+), 94 deletions(-) diff --git a/composer.json b/composer.json index c262a1c..f5a796c 100644 --- a/composer.json +++ b/composer.json @@ -12,6 +12,12 @@ ], "require": { "php": "^7.2", + "ext-dom": "*", + "ext-intl": "*", + "ext-json": "*", + "ext-libxml": "*", + "ext-mbstring": "*", + "ext-pdo": "*", "anhskohbo/no-captcha": "^3.0", "doctrine/dbal": "^2.9", "erusev/parsedown": "^1.7", diff --git a/composer.lock b/composer.lock index 5a68ddc..a0f42c5 100644 --- a/composer.lock +++ b/composer.lock @@ -4,7 +4,7 @@ "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies", "This file is @generated automatically" ], - "content-hash": "2c0bd951a595d4856079c5a13a72e651", + "content-hash": "1bba68b609be6a0dcdaf05d72e8eb759", "packages": [ { "name": "anhskohbo/no-captcha", @@ -394,20 +394,6 @@ "sqlserver", "sqlsrv" ], - "funding": [ - { - "url": "https://www.doctrine-project.org/sponsorship.html", - "type": "custom" - }, - { - "url": "https://www.patreon.com/phpdoctrine", - "type": "patreon" - }, - { - "url": "https://tidelift.com/funding/github/packagist/doctrine%2Fdbal", - "type": "tidelift" - } - ], "time": "2020-04-20T17:19:26+00:00" }, { @@ -1617,12 +1603,6 @@ "transform", "write" ], - "funding": [ - { - "url": "https://github.com/sponsors/nyamsprod", - "type": "github" - } - ], "time": "2020-03-17T15:15:35+00:00" }, { @@ -1707,12 +1687,6 @@ "sftp", "storage" ], - "funding": [ - { - "url": "https://offset.earth/frankdejonge", - "type": "other" - } - ], "time": "2020-05-18T15:13:39+00:00" }, { @@ -1839,16 +1813,6 @@ "logging", "psr-3" ], - "funding": [ - { - "url": "https://github.com/Seldaek", - "type": "github" - }, - { - "url": "https://tidelift.com/funding/github/packagist/monolog/monolog", - "type": "tidelift" - } - ], "time": "2020-05-22T08:12:19+00:00" }, { @@ -5875,12 +5839,6 @@ "profiler", "webprofiler" ], - "funding": [ - { - "url": "https://github.com/barryvdh", - "type": "github" - } - ], "time": "2020-05-05T10:53:32+00:00" }, { @@ -5952,12 +5910,6 @@ "phpstorm", "sublime" ], - "funding": [ - { - "url": "https://github.com/barryvdh", - "type": "github" - } - ], "time": "2020-04-22T09:57:26+00:00" }, { @@ -6063,16 +6015,6 @@ "ssl", "tls" ], - "funding": [ - { - "url": "https://packagist.com", - "type": "custom" - }, - { - "url": "https://tidelift.com/funding/github/packagist/composer/composer", - "type": "tidelift" - } - ], "time": "2020-04-08T08:27:21+00:00" }, { @@ -6154,16 +6096,6 @@ "dependency", "package" ], - "funding": [ - { - "url": "https://packagist.com", - "type": "custom" - }, - { - "url": "https://tidelift.com/funding/github/packagist/composer/composer", - "type": "tidelift" - } - ], "time": "2020-05-06T08:28:10+00:00" }, { @@ -6536,12 +6468,6 @@ "flare", "reporting" ], - "funding": [ - { - "url": "https://www.patreon.com/spatie", - "type": "patreon" - } - ], "time": "2020-03-02T15:52:04+00:00" }, { @@ -8088,12 +8014,6 @@ "highlight.php", "syntax" ], - "funding": [ - { - "url": "https://github.com/allejo", - "type": "github" - } - ], "time": "2020-03-02T05:59:21+00:00" }, { @@ -8758,16 +8678,6 @@ "parser", "validator" ], - "funding": [ - { - "url": "https://github.com/Seldaek", - "type": "github" - }, - { - "url": "https://tidelift.com/funding/github/packagist/seld/jsonlint", - "type": "tidelift" - } - ], "time": "2020-04-30T19:05:18+00:00" }, { @@ -9032,8 +8942,8 @@ "authors": [ { "name": "Arne Blankerts", - "email": "arne@blankerts.de", - "role": "Developer" + "role": "Developer", + "email": "arne@blankerts.de" } ], "description": "A small library for converting tokenized PHP source code into XML and potentially other formats", @@ -9097,7 +9007,13 @@ "prefer-stable": true, "prefer-lowest": false, "platform": { - "php": "^7.2" + "php": "^7.2", + "ext-dom": "*", + "ext-intl": "*", + "ext-json": "*", + "ext-libxml": "*", + "ext-mbstring": "*", + "ext-pdo": "*" }, "platform-dev": [], "plugin-api-version": "1.1.0" From b5901f26bfce09d86d8dd89b288cb655fd67f768 Mon Sep 17 00:00:00 2001 From: shibafu Date: Fri, 31 Jul 2020 23:10:54 +0900 Subject: [PATCH 6/7] add test --- tests/Unit/Utilities/FormatterTest.php | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/tests/Unit/Utilities/FormatterTest.php b/tests/Unit/Utilities/FormatterTest.php index 843e60f..1708e4b 100644 --- a/tests/Unit/Utilities/FormatterTest.php +++ b/tests/Unit/Utilities/FormatterTest.php @@ -70,4 +70,30 @@ class FormatterTest extends TestCase $formatter->profileImageSrcSet($profileImageProvider, 128, 2) ); } + + /** + * @dataProvider provideNormalizeTagName + */ + public function testNormalizeTagName($input, $expected) + { + $formatter = new Formatter(); + + $normalized = $formatter->normalizeTagName($input); + $this->assertSame($expected, $normalized); + $this->assertSame($expected, $formatter->normalizeTagName($normalized)); + } + + public function provideNormalizeTagName() + { + return [ + 'LowerCase' => ['example', 'example'], + 'UpperCase' => ['EXAMPLE', 'example'], + 'HalfWidthKana' => ['ティッシュ', 'ティッシュ'], + 'FullWidthAlphabet' => ['Tissue', 'tissue'], + '組み文字1' => ['13㎝', '13cm'], + '組み文字2' => ['13㌢㍍', '13センチメートル'], + 'Script' => ['ℬ𝒶𝒷𝓊𝓂𝒾', 'babumi'], + '分割された濁点' => ['オカス゛', 'オカズ'], + ]; + } } From 18ae64a8704c826a58401fbe34e281e5ccb55cc6 Mon Sep 17 00:00:00 2001 From: shibafu Date: Fri, 31 Jul 2020 23:19:35 +0900 Subject: [PATCH 7/7] =?UTF-8?q?=E3=83=86=E3=82=B9=E3=83=88=E3=81=97?= =?UTF-8?q?=E3=81=9F=E3=81=84=E3=81=AE=E3=81=AFNFD=E3=81=A0=E3=81=A3?= =?UTF-8?q?=E3=81=9F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/Unit/Utilities/FormatterTest.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/Unit/Utilities/FormatterTest.php b/tests/Unit/Utilities/FormatterTest.php index 1708e4b..46718fa 100644 --- a/tests/Unit/Utilities/FormatterTest.php +++ b/tests/Unit/Utilities/FormatterTest.php @@ -93,7 +93,7 @@ class FormatterTest extends TestCase '組み文字1' => ['13㎝', '13cm'], '組み文字2' => ['13㌢㍍', '13センチメートル'], 'Script' => ['ℬ𝒶𝒷𝓊𝓂𝒾', 'babumi'], - '分割された濁点' => ['オカス゛', 'オカズ'], + 'NFD' => ['オカズ', 'オカズ'], ]; } }