diff --git a/db/allTlds.db b/db/allTlds.db new file mode 100644 index 0000000..510ac62 Binary files /dev/null and b/db/allTlds.db differ diff --git a/functions/ExtractDomain.php b/functions/ExtractDomain.php new file mode 100644 index 0000000..69c1d7d --- /dev/null +++ b/functions/ExtractDomain.php @@ -0,0 +1,77 @@ +open('./db/allTlds.db'); // 顶级域名数据库 + } +} + +class extractDomain { + private function getAllTlds() { // 获取所有顶级域 含次级域 + $db = new tldDB; + $res = $db->query('SELECT tld FROM `tlds`;'); + while ($row = $res->fetchArray(SQLITE3_ASSOC)) { + $tlds[] = $row['tld']; + } + return $tlds; // Unicode字符使用Punycode编码 + } + + private function isDomain($domain) { // 检测是否为域名 + preg_match('/^(?=^.{3,255}$)[a-zA-Z0-9][-a-zA-Z0-9]{0,62}(\.[a-zA-Z0-9][-a-zA-Z0-9]{0,62})+$/', $domain, $match); + return (count($match) != 0); + } + + private function getDomain($url) { // 从URL获取域名 + $url = preg_replace('/^[\w]+:\/\//', '', $url); // 去除协议字段 + $url = explode('?', $url)[0]; // 去除请求参数内容 + $domain = explode('/', $url)[0]; // 分离域名 + return (new Punycode)->encode($domain); + } + + private function getTld($domain) { // 搜索域名TLD + $tlds = $this->getAllTlds(); // 获取TLD列表 + foreach ($tlds as $tld) { + if (substr($domain, -strlen($tld)) === $tld) { // 匹配测试 + $target[] = $tld; + } + } + if (count($target) === 0) { + return ''; // 匹配不到TLD + }; + $type = 0; + foreach ($target as $tld) { // 遍历可能的结果 + $num = substr_count($tld, '.'); + if ($type < $num) { // 获取.个数最多的 + $type = $num; + $result = $tld; + } + } + return $result; // 返回网站顶级域名 + } + + private function getSite($domain, $tld) { // 获取主域名 + $domain = explode('.', $domain); + $num = count($domain) - substr_count($tld, '.'); + return $domain[$num - 1] . $tld; + } + + public function analyse($url) { // 分析域名信息 + $domain = $this->getDomain($url); + if (!$this->isDomain($domain)) { // 域名不合格 + return array(); + } + $tld = $this->getTld($domain); + if ($tld == '') { // 匹配不到TLD + return array( + 'domain' => $domain + ); + } + return array( + 'domain' => $domain, + 'tld' => $tld, + 'site' => $this->getSite($domain, $tld) + ); + } +} + +?> diff --git a/lib/Punycode.php b/functions/Punycode.php similarity index 100% rename from lib/Punycode.php rename to functions/Punycode.php diff --git a/redisCache.php b/functions/RedisCache.php similarity index 100% rename from redisCache.php rename to functions/RedisCache.php diff --git a/tgInterface.php b/functions/TgInterface.php similarity index 100% rename from tgInterface.php rename to functions/TgInterface.php diff --git a/main.php b/main.php index 7604ba0..70f9b45 100644 --- a/main.php +++ b/main.php @@ -1,9 +1,10 @@