Browse Source

feat: extract domain info from url

master
Dnomd343 3 years ago
parent
commit
5989197329
  1. BIN
      db/allTlds.db
  2. 77
      functions/ExtractDomain.php
  3. 0
      functions/Punycode.php
  4. 0
      functions/RedisCache.php
  5. 0
      functions/TgInterface.php
  6. 7
      main.php

BIN
db/allTlds.db

Binary file not shown.

77
functions/ExtractDomain.php

@ -0,0 +1,77 @@
<?php
class tldDB extends SQLite3 {
function __construct() {
$this->open('./db/allTlds.db'); // 顶级域名数据库
}
}
class extractDomain {
private function getAllTlds() { // 获取所有顶级域 含次级域
$db = new tldDB;
$res = $db->query('SELECT tld FROM `tlds`;');
while ($row = $res->fetchArray(SQLITE3_ASSOC)) {
$tlds[] = $row['tld'];
}
return $tlds; // Unicode字符使用Punycode编码
}
private function isDomain($domain) { // 检测是否为域名
preg_match('/^(?=^.{3,255}$)[a-zA-Z0-9][-a-zA-Z0-9]{0,62}(\.[a-zA-Z0-9][-a-zA-Z0-9]{0,62})+$/', $domain, $match);
return (count($match) != 0);
}
private function getDomain($url) { // 从URL获取域名
$url = preg_replace('/^[\w]+:\/\//', '', $url); // 去除协议字段
$url = explode('?', $url)[0]; // 去除请求参数内容
$domain = explode('/', $url)[0]; // 分离域名
return (new Punycode)->encode($domain);
}
private function getTld($domain) { // 搜索域名TLD
$tlds = $this->getAllTlds(); // 获取TLD列表
foreach ($tlds as $tld) {
if (substr($domain, -strlen($tld)) === $tld) { // 匹配测试
$target[] = $tld;
}
}
if (count($target) === 0) {
return ''; // 匹配不到TLD
};
$type = 0;
foreach ($target as $tld) { // 遍历可能的结果
$num = substr_count($tld, '.');
if ($type < $num) { // 获取.个数最多的
$type = $num;
$result = $tld;
}
}
return $result; // 返回网站顶级域名
}
private function getSite($domain, $tld) { // 获取主域名
$domain = explode('.', $domain);
$num = count($domain) - substr_count($tld, '.');
return $domain[$num - 1] . $tld;
}
public function analyse($url) { // 分析域名信息
$domain = $this->getDomain($url);
if (!$this->isDomain($domain)) { // 域名不合格
return array();
}
$tld = $this->getTld($domain);
if ($tld == '') { // 匹配不到TLD
return array(
'domain' => $domain
);
}
return array(
'domain' => $domain,
'tld' => $tld,
'site' => $this->getSite($domain, $tld)
);
}
}
?>

0
lib/Punycode.php → functions/Punycode.php

0
redisCache.php → functions/RedisCache.php

0
tgInterface.php → functions/TgInterface.php

7
main.php

@ -1,9 +1,10 @@
<?php
require_once 'cmdRoute.php';
require_once 'redisCache.php';
require_once 'tgInterface.php';
require_once 'lib/Punycode.php';
require_once 'functions/Punycode.php';
require_once 'functions/RedisCache.php';
require_once 'functions/TgInterface.php';
require_once 'functions/ExtractDomain.php';
$env = loadEnv();
$apiToken = $env['BOT_TOKEN'];

Loading…
Cancel
Save