kbin-core/src/Utils/UrlCleaner.php

132 lines
3.2 KiB
PHP

<?php
declare(strict_types=1);
namespace App\Utils;
use App\Exception\BadUrlException;
class UrlCleaner
{
// https://gist.github.com/htsign/455bd76d107be1f810c5caa4072c8275
public const TRACKING_TAGS = [
'utm_source',
'utm_medium',
'utm_term',
'utm_content',
'utm_campaign',
'utm_reader',
'utm_place',
'utm_userid',
'utm_cid',
'utm_name',
'utm_pubreferrer',
'utm_swu',
'utm_viz_id',
'utm_int',
'ga_source',
'ga_medium',
'ga_term',
'ga_content',
'ga_campaign',
'ga_place',
'yclid, _openstat',
'fb_action_ids',
'fb_action_types',
'fb_ref',
'fb_source',
'action_object_map',
'action_type_map',
'action_ref_map',
'gs_l',
'pd_rd_*@amazon.*',
'_encoding@amazon.*',
'psc@amazon.*',
'ei@google.*',
'bi?@google.*',
'client@google.*',
'dpr@google.*',
'gws_rd@google.*',
'oq@google.*',
'sa@google.*',
'sei@google.*',
'source@google.*',
'tbm@google.*',
'ved@google.*',
'cvid@bing.com',
'form@bing.com',
'sk@bing.com',
'sp@bing.com',
'sc@bing.com',
'qs@bing.com',
'pq@bing.com',
'feature@youtube.com',
'gclid@youtube.com',
'kw@youtube.com',
'gws_rd',
'hmb_campaign',
'hmb_medium',
'hmb_source',
'_hsmi',
'ref_src',
'ref_url',
'source@sourceforge.net',
'position@sourceforge.net',
'callback@bilibili.com',
'ref@www.asahi.com',
'iref@www.asahi.com',
'rm@digital.asahi.com',
'word_result@nhk.or.jp',
'algorithm@www.change.org',
'grid_position@www.change.org',
'j@www.change.org',
'jb@www.change.org',
'mid@www.change.org',
'l@www.change.org',
'original_footer_petition_id@www.change.org',
'placement@www.change.org',
'pt@www.change.org',
'sfmc_sub@www.change.org',
'source_location@www.change.org',
'u@www.change.org',
'n_cid@nikkeibp.co.jp',
'fbclid@itmedia.co.jp',
'ref@*.nicovideo.jp',
'#?utm_medium',
'#?utm_source',
'#?utm_campaign',
'#?utm_content',
'#?utm_int',
'fbclid',
];
public function __invoke(string $url): string
{
foreach (self::TRACKING_TAGS as $tag) {
$url = $this->removeVar($url, $tag);
}
return $url;
}
private function removeVar(string $url, string $var): string
{
[$urlPart, $qsPart] = array_pad(explode('?', $url), 2, '');
parse_str($qsPart, $qsVars);
unset($qsVars[$var]);
$newQs = http_build_query($qsVars);
return $this->validate(trim($urlPart.'?'.$newQs, '?'));
}
private function validate(string $url): string
{
// @todo checkdnsrr?
if (!filter_var($url, FILTER_VALIDATE_URL)) {
throw new BadUrlException($url);
}
return $url;
}
}