mirror of https://github.com/dnomd343/IANA-TLD
Dnomd343
3 years ago
commit
a5a9560c22
4 changed files with 1021 additions and 0 deletions
@ -0,0 +1,589 @@ |
|||
<?php |
|||
|
|||
function isDomain($domain) { |
|||
preg_match('/^(?=^.{3,255}$)[a-zA-Z0-9][-a-zA-Z0-9]{0,62}(\.[a-zA-Z0-9][-a-zA-Z0-9]{0,62})+$/', $domain, $match); |
|||
return (count($match) != 0); |
|||
} |
|||
|
|||
function isVoice($str) { |
|||
$regs = array( |
|||
'/^\+[0-9]{1,3} [0-9 -]+$/', |
|||
'/^\+ [0-9]{1,3} [0-9 -]+$/', |
|||
'/^\+[0-9]{1,3}-[0-9 -]+$/', |
|||
'/^[0-9]{1,3}.[0-9.]+$/', |
|||
'/^[0-9]{1,3}-[0-9-]+$/', |
|||
'/^\([0-9]{1,3}\) [0-9 -]+$/', |
|||
'/^[0-9]{1,3} [0-9 -]+$/', |
|||
'/^[0-9]{1}-[0-9-]+$/', |
|||
'/^[0-9.]+ x1$/', |
|||
'/^\+[0-9]{1,3}[0-9 -]+$/', |
|||
'/^\+[0-9]{1,3}[0-9.]+$/', |
|||
'/^\+\+[0-9]+$/', |
|||
'/^[0-9 ]+$/', |
|||
'/^[0-9.-]+$/', |
|||
'/^[0-9-]+ [0-9]+$/', |
|||
'/^\(\+\) [0-9-]+$/', |
|||
'/^[0-9.]+ [0-9 ]+$/', |
|||
'/^\+[0-9]+.[0-9]+x[0-9]+$/', |
|||
'/^\+[0-9]+\([0-9]\)[0-9]+$/', |
|||
'/^[0-9]+ \([0-9]{3}\) [0-9 ]+$/', |
|||
'/^\+[0-9]{2} \([0-9]{3}\) [0-9 ]+$/', |
|||
'/^\+ \([0-9]{3}\) [0-9]+$/', |
|||
'/^\+\([0-9]{3}\) [0-9 ]+$/', |
|||
'/^\(\+[0-9]{3}\) [0-9]+$/', |
|||
'/^\+[0-9.]+-[0-9]{2}$/', |
|||
'/^\+[0-9\/]+ [0-9 ]+$/', |
|||
'/^\+[0-9 ]+–[0-9]$/', |
|||
'/^[0-9 ]+ x114$/', |
|||
'/^\+ [0-9-]+$/', |
|||
'/^\+[0-9].[0-9-]+$/', |
|||
'/^[0-9 -]+,[0-9 -]+$/', |
|||
'/^\(\+[0-9]\) [0-9-]+$/', |
|||
'/^\+[0-9] [0-9]+ x204$/', |
|||
'/^\+[0-9]{1,3}.[0-9 ]+$/', |
|||
'/^\+[0-9]{1,3} [0-9.]+$/', |
|||
'/^\+[0-9].[0-9]+x[0-9]+$/', |
|||
'/^\+[0-9] [0-9.]+ [0-9.]+$/', |
|||
'/^\+[0-9]+\([0-9]\)[0-9 ]+$/', |
|||
'/^\+[0-9]+ \([0-9]\)[0-9 ]+$/', |
|||
'/^\+[0-9]\([0-9]{3}\)[0-9]+$/', |
|||
'/^\+[0-9]{3} [0-9]{3} [0-9\/]+$/', |
|||
'/^\+[0-9] \([0-9]{3}\) [0-9 ]+$/', |
|||
'/^\+[0-9]{1,3}\([0-9]\) [0-9 ]+$/', |
|||
'/^\+[0-9] \([0-9]{4}\) [0-9- ]+$/', |
|||
'/^\+[0-9 ]+\/[0-9]+ or \+[0-9 ]+$/', |
|||
'/^\+[0-9]{2} [0-9]{4} \/ [0-9]{6}$/', |
|||
'/^\+[0-9]{1,3} \([0-9]\) [0-9 -]+$/', |
|||
'/^\+[0-9 ]+, \+[0-9 ]+, \+[0-9]+,$/', |
|||
'/^\+[0-9]{2} \([0-9]{2}\) [0-9 ]+$/', |
|||
'/^\+[0-9]{3} \([0-9]\) [0-9 ]+ or \+[0-9]{3} \([0-9]\) [0-9 ]+$/', |
|||
'/^[0-9]+ [0-9-]+, [0-9]+ [0-9-]+, [0-9]+ [0-9-]+$/', |
|||
'/^\+[0-9] \([0-9]{3}\) [0-9-]+, ext [0-9]{4}$/', |
|||
'/^\+[0-9]{5} [0-9 ]+ \/ \+[0-9]{5} [0-9 ]+$/', |
|||
'/^\+[0-9]{1,3} \([0-9]{1,3}\) [0-9-]+$/', |
|||
'/^\+[0-9]{1,3} \([0-9]{1}\) [0-9 ]+$/', |
|||
'/^\+[0-9]{2}.[0-9]+ ext\: [0-9]{4}$/', |
|||
'/^\+[0-9-]+, \+[0-9-]+, \+[0-9-]+$/', |
|||
'/^[0-9]{1,3} [0-9-]+ EXT. [0-9]+$/', |
|||
'/^\+[0-9] [0-9 -]+ Ext. [0-9]+$/', |
|||
'/^\+[0-9] [0-9 -]+ ext [0-9]+$/', |
|||
'/^\+[0-9]{2}\([0-9]\)[0-9-]+$/', |
|||
'/^\+[0-9] [0-9 ]+ xt. [0-9]+$/', |
|||
'/^\+[0-9 ]+ ext [0-9]{3}$/', |
|||
'/^\+[0-9 ]+ ext. [0-9]+$/', |
|||
'/\+[0-9 ]+ Ext. [0-9]+$/', |
|||
'/^[0-9 ]+ ext. [0-9]+$/', |
|||
'/^\+[0-9]-[0-9-]+ x1$/', |
|||
'/^\+[0-9-]+ x 102$/', |
|||
'/^[0-9 ]+ Ext. 1$/', |
|||
'/^\+[0-9 ]+[0-9\/]+$/', |
|||
'/^\+[0-9]{2}.[0-9 ]+$/', |
|||
'/^\+[0-9 ]+ ; \+[0-9 ]+$/', |
|||
'/^\+ \([0-9]{3}\) [0-9 ]+$/', |
|||
'/^\+[0-9- ]+ ext. [0-9]{3}$/', |
|||
'/^[0-9]{2} \([0-9]\)[0-9 ]+$/', |
|||
'/^\+[0-9]{3} [0-9]{4}\+[0-9]{4}$/', |
|||
'/^\+[0-9 ]+, \+[0-9 ]+, \+[0-9 ]+$/', |
|||
'/^\+[0-9] \([0-9]{4}\) [0-9-]+, [0-9-]+$/', |
|||
'/^\+[0-9] \([0-9]{3}\)[0-9]+., \+\([0-9]{3}\) [0-9]+$/', |
|||
'/^\+[0-9]{3} \([0-9]\) [0-9 ]+, \+[0-9]{3} \([0-9]\) [0-9 ]+$/' |
|||
); |
|||
foreach ($regs as $reg) { |
|||
preg_match($reg, $str, $match); |
|||
if (count($match) === 1) { |
|||
return true; |
|||
} |
|||
} |
|||
return false; |
|||
} |
|||
|
|||
function splitHtml($htmlFile) { |
|||
// Get core part |
|||
$html = file_get_contents($htmlFile); |
|||
$html = explode('main_right">', $html)[1]; |
|||
$html = explode('<div id="sidebar_left', $html)[0]; |
|||
$html = explode('<script>', $html)[0]; |
|||
|
|||
// Get title |
|||
preg_match('/<h1>[\s\S]+<\/h1>/', $html, $match); // First <h1>...</h1> |
|||
if (count($match) !== 1) { |
|||
die('error -> title'); |
|||
} |
|||
$match = trim($match[0]); |
|||
$result['title'] = substr($match, 4, strlen($match) - 9); |
|||
|
|||
// Get domain type |
|||
preg_match('/<p>[\s\S]+?<\/p>/', $html, $match); // First <p>...</p> |
|||
if (count($match) !== 1) { |
|||
die('error -> domain'); |
|||
} |
|||
$match = trim($match[0]); |
|||
$result['type'] = substr($match, 3, strlen($match) - 7); |
|||
|
|||
$html = explode('<h2>', $html); |
|||
if (count($html) !== 6 && count($html) !== 7) { |
|||
die('error -> html'); |
|||
} |
|||
|
|||
// Get manager |
|||
$manager = trim($html[1]); |
|||
if (strpos($manager, 'Sponsoring Organisation') !== 0 && strpos($manager, 'ccTLD Manager') !== 0) { |
|||
die('error -> manager'); |
|||
} |
|||
$result['manager'] = trim(explode('</h2>', $manager)[1]); |
|||
|
|||
// Get admin contact |
|||
$admin = trim($html[2]); |
|||
if (strpos($admin, 'Administrative Contact') !== 0) { |
|||
die('error -> admin contact'); |
|||
} |
|||
$result['admin'] = trim(explode('</h2>', $admin)[1]); |
|||
|
|||
// Get tech contact |
|||
$tech = trim($html[3]); |
|||
if (strpos($tech, 'Technical Contact') !== 0) { |
|||
die('error -> tech contact'); |
|||
} |
|||
$result['tech'] = trim(explode('</h2>', $tech)[1]); |
|||
|
|||
// Get nameserver |
|||
$ns = trim($html[4]); |
|||
if (strpos($ns, 'Name Servers') !== 0) { |
|||
die('error -> name server'); |
|||
} |
|||
preg_match('/This domain is not present in the root zone at this time./', $ns, $match); |
|||
if (count($match) !== 0) { |
|||
$result['ns'] = ''; |
|||
} else { |
|||
$ns = explode('<tbody>', $ns)[1]; |
|||
$ns = explode('</tbody>', $ns)[0]; |
|||
$result['ns'] = trim($ns); |
|||
} |
|||
|
|||
// Get registry info |
|||
$info = trim($html[5]); |
|||
if (strpos($info, 'Registry Information') !== 0) { |
|||
die('error -> registry info'); |
|||
} |
|||
$info = trim(explode('</h2>', $info)[1]); |
|||
if (strpos($info, '<i>')) { |
|||
if (count($html) !== 6) { |
|||
die('error -> registry info'); |
|||
} |
|||
$date = trim(explode('<i>', $info)[1]); |
|||
$info = trim(explode('<i>', $info)[0]); |
|||
if (substr($info, -3) !== '<p>') { |
|||
die('error -> registry info'); |
|||
} |
|||
$result['info'] = trim(substr($info, 0, strlen($info) - 3)); |
|||
} else { |
|||
if (count($html) !== 7) { |
|||
die('error -> registry info'); |
|||
} |
|||
if (substr($info, -4) !== '</p>') { |
|||
die('error -> registry info'); |
|||
} |
|||
$result['info'] = $info; |
|||
|
|||
// Get report |
|||
$report = trim($html[6]); |
|||
if (strpos($report, 'IANA Reports') !== 0) { |
|||
die('error -> report'); |
|||
} |
|||
$date = trim(explode('<i>', $report)[1]); |
|||
preg_match('/<ul>[\s\S]+<\/ul>/', $report, $match); |
|||
if (count($match) !== 1) { |
|||
die('error -> report'); |
|||
} |
|||
$report = trim($match[0]); |
|||
if (substr($report, 0, 4) !== '<ul>') { |
|||
die('error -> report'); |
|||
} |
|||
$report = substr($report, 4 - strlen($report)); |
|||
if (substr($report, -5) !== '</ul>') { |
|||
die('error -> report'); |
|||
} |
|||
$result['report'] = trim(substr($report, 0, strlen($report) - 5)); |
|||
} |
|||
|
|||
// Get date |
|||
if (substr($date, -8) !== '</i></p>') { |
|||
die('error -> date'); |
|||
} |
|||
$result['date'] = trim(substr($date, 0, strlen($date) - 8)); |
|||
if (count($result) !== 8 && count($result) !== 9) { |
|||
die('error -> result'); |
|||
} |
|||
if (!isset($result['title']) || !isset($result['type']) || !isset($result['manager']) || !isset($result['admin'])) { |
|||
die('error -> result'); |
|||
} |
|||
if (!isset($result['tech']) || !isset($result['ns']) || !isset($result['info']) || !isset($result['date'])) { |
|||
die('error -> result'); |
|||
} |
|||
if (count($result) === 9 && !isset($result['report'])) { |
|||
die('error -> result'); |
|||
} |
|||
foreach ($result as &$row) { |
|||
$row = trim($row); |
|||
} |
|||
return $result; |
|||
} |
|||
|
|||
function getHtmlTitle($str) { // 提取标题TLD字段 |
|||
$str = str_replace('<span class="force-rtl">', '', $str); |
|||
$str = str_replace('</span>', '', $str); |
|||
if (strpos($str, 'Delegation Record for .') !== 0) { |
|||
die('error analyse -> title'); |
|||
} |
|||
$str = substr($str, 22 - strlen($str)); |
|||
$str = (new Punycode)->encode($str); |
|||
if ($str === '.xn--l4fe') { // 特殊差错 |
|||
return '.xn--node'; |
|||
} |
|||
return $str; |
|||
} |
|||
|
|||
function getHtmlType($str) { // 提取TLD类型 |
|||
preg_match('/^\(Country-code top-level domain designated for two-letter country code [A-Z]{2}\)/', $str, $match); |
|||
if (count($match) !== 0) { |
|||
return 'ccTLD for ' . substr(substr($str, -3), 0, 2); |
|||
} |
|||
switch ($str) { |
|||
case '(Generic top-level domain)': |
|||
return 'gTLD'; |
|||
case '(Country-code top-level domain)': |
|||
return 'ccTLD'; |
|||
case '(Sponsored top-level domain)': |
|||
return 'sTLD'; |
|||
case '(Infrastructure top-level domain)': |
|||
return 'Infrastructure TLD'; |
|||
case '(Restricted generic top-level domain)': |
|||
return 'Restricted TLD'; |
|||
case '(Test top-level domain)': |
|||
return 'TLD for test'; |
|||
default: |
|||
die('error analyse -> type'); |
|||
} |
|||
} |
|||
|
|||
function getHtmlManager($str) { // 提取TLD所有者信息 |
|||
if ($str == '') { |
|||
return array( |
|||
'manager' => '', |
|||
'manager_info' => '' |
|||
); |
|||
} |
|||
$temp = explode('</b><br/>', $str); |
|||
if (count($temp) !== 2) { |
|||
die('error analyse -> manager'); |
|||
} |
|||
$manager = trim($temp[0]); |
|||
$manager = substr($manager, 3 - strlen($manager)); |
|||
if ($manager === 'Not assigned') { |
|||
return array( |
|||
'manager' => '', |
|||
'manager_info' => '' |
|||
); |
|||
} |
|||
if ($temp[1] == '') { |
|||
return array( |
|||
'manager' => $manager, |
|||
'manager_info' => '' |
|||
); |
|||
} |
|||
$temp = str_replace('<br>', '<br/>', trim($temp[1])); |
|||
$temp = explode('<br/>', $temp); |
|||
foreach ($temp as $line) { |
|||
$line = trim($line); |
|||
if ($line != '') { |
|||
$manager_addr[] = $line; |
|||
} |
|||
} |
|||
if (!isset($manager_addr)) { |
|||
die('error analyse -> manager'); |
|||
} |
|||
return array( |
|||
'manager' => $manager, |
|||
'manager_addr' => $manager_addr |
|||
); |
|||
} |
|||
|
|||
function getHtmlContact($str) { // 提取联系人信息 |
|||
if ($str === '') { |
|||
return array(); |
|||
} |
|||
preg_match_all('/<b>[\s\S]+?<\/b>/', $str, $match); |
|||
if (count($match) !== 1) { |
|||
die('error analyse -> contact'); |
|||
} |
|||
$match = $match[0]; |
|||
if (count($match) === 1 && $match[0] === '<b>Not assigned</b>') { |
|||
return array(); |
|||
} |
|||
if (count($match) !== 4 && count($match) !== 3 && count($match) !== 2) { |
|||
die('error analyse -> contact'); |
|||
} |
|||
if ($match[1] !== '<b>Email:</b>') { |
|||
die('error analyse -> contact'); |
|||
} |
|||
$email = 1; |
|||
$voice = -1; |
|||
$fax = -1; |
|||
if (isset($match[2])) { |
|||
if ($match[2] === '<b>Voice:</b>') { |
|||
$voice = 2; |
|||
} else if ($match[2] === '<b>Fax:</b>') { |
|||
$fax = 2; |
|||
} else { |
|||
die('error analyse -> contact'); |
|||
} |
|||
} |
|||
if (isset($match[3])) { |
|||
if ($match[3] === '<b>Fax:</b>') { |
|||
$fax = 3; |
|||
} else { |
|||
die('error analyse -> contact'); |
|||
} |
|||
} |
|||
$name = substr($match[0], 0, strlen($match[0]) - 4); |
|||
$result['name'] = trim(substr($name, 3 - strlen($name))); |
|||
$str = preg_replace('/<b>[\s\S]+?<\/b>/', '|_|-|_|', $str); |
|||
$contact = explode('|_|-|_|', $str); |
|||
if ($contact[0] !== '') { |
|||
die('error analyse -> contact'); |
|||
} |
|||
if (count($contact) !== 5 && count($contact) !== 4 && count($contact) !== 3) { |
|||
die('error analyse -> contact'); |
|||
} |
|||
unset($contact[0]); |
|||
$temp = str_replace('<br>', '<br/>', trim($contact[1])); |
|||
$temp = explode('<br/>', $temp); |
|||
foreach ($temp as $index => &$line) { |
|||
$line = trim($line); |
|||
if ($line == '') { |
|||
unset($temp[$index]); |
|||
} |
|||
} |
|||
if (count($temp) < 2) { |
|||
die('error analyse -> contact'); |
|||
} |
|||
$flag = false; |
|||
foreach ($temp as $line) { |
|||
if (!$flag) { |
|||
$result['org'] = $line; |
|||
$flag = true; |
|||
continue; |
|||
} |
|||
$addr[] = $line; |
|||
} |
|||
$result['addr'] = $addr; |
|||
$result['email'] = ''; |
|||
$result['voice'] = ''; |
|||
$result['fax'] = ''; |
|||
foreach ($contact as $index => &$row) { |
|||
$row = trim($row); |
|||
if (substr($row, -5) !== '<br/>') { |
|||
die('error analyse -> contact'); |
|||
} |
|||
if ($index - 1 === $email) { |
|||
$result['email'] = $row; |
|||
} |
|||
if ($index - 1 === $voice) { |
|||
$result['voice'] = $row; |
|||
} |
|||
if ($index - 1 === $fax) { |
|||
$result['fax'] = $row; |
|||
} |
|||
} |
|||
if ($result['email'] != '' && substr($result['email'], -5) !== '<br/>') { |
|||
die('error analyse -> contact'); |
|||
} |
|||
$result['email'] = substr($result['email'], 0, strlen($result['email']) - 5); |
|||
if ($result['voice'] != '' && substr($result['voice'], -5) !== '<br/>') { |
|||
die('error analyse -> contact'); |
|||
} |
|||
$result['voice'] = substr($result['voice'], 0, strlen($result['voice']) - 5); |
|||
if ($result['fax'] != '' && substr($result['fax'], -5) !== '<br/>') { |
|||
die('error analyse -> contact'); |
|||
} |
|||
$result['fax'] = substr($result['fax'], 0, strlen($result['fax']) - 5); |
|||
if ($result['fax'] === 'n/a' || $result['fax'] === 'NA' || $result['fax'] === 'N/A' || $result['fax'] === '-') { |
|||
$result['fax'] = ''; |
|||
} |
|||
if (substr($result['voice'], 0, 1) === ']') { |
|||
$result['voice'] = trim(substr($result['voice'], 1 - strlen($result['voice']))); |
|||
} |
|||
if($result['email'] != '' && !filter_var($result['email'], FILTER_VALIDATE_EMAIL)) { |
|||
die('error analyse -> contact'); |
|||
} |
|||
if($result['voice'] != '' && !isVoice($result['voice'])) { |
|||
die('error analyse -> contact'); |
|||
} |
|||
if($result['fax'] != '' && !isVoice($result['fax'])) { |
|||
die('error analyse -> contact'); |
|||
} |
|||
return $result; |
|||
} |
|||
|
|||
function getHtmlNS($str) { // 提取TLD名称服务器 |
|||
if ($str === '') { |
|||
return array(); |
|||
} |
|||
preg_match_all('/<tr>[\s\S]+?<\/tr>/', $str, $match); |
|||
if (count($match) !== 1) { |
|||
die('error analyse -> ns'); |
|||
} |
|||
$match = $match[0]; |
|||
if (count($match) === 0) { |
|||
die('error analyse -> ns'); |
|||
} |
|||
foreach ($match as $row) { |
|||
$row = trim(str_replace('<tr>', '', $row)); |
|||
$row = trim(str_replace('</tr>', '', $row)); |
|||
preg_match_all('/<td>[\s\S]+?<\/td>/', $row, $match); |
|||
if (count($match) !== 1) { |
|||
die('error analyse -> ns'); |
|||
} |
|||
$match = $match[0]; |
|||
if (count($match) === 0) { |
|||
die('error analyse -> ns'); |
|||
} |
|||
$match[0] = trim(str_replace('<td>', '', $match[0])); |
|||
$match[0] = trim(str_replace('</td>', '', $match[0])); |
|||
$match[1] = trim(str_replace('<td>', '', $match[1])); |
|||
$match[1] = trim(str_replace('</td>', '', $match[1])); |
|||
$temp = explode('<br/>', $match[1]); |
|||
$ips = array(); |
|||
foreach ($temp as $ip) { |
|||
if ($ip == '') { continue; } |
|||
if (!filter_var($ip, FILTER_VALIDATE_IP)) { |
|||
die('error analyse -> ns'); |
|||
} |
|||
$ips[] = $ip; |
|||
} |
|||
if (count($ips) === 0) { |
|||
die('error analyse -> ns'); |
|||
} |
|||
if (!isDomain($match[0])) { |
|||
die('error analyse -> ns'); |
|||
} |
|||
$ns[$match[0]] = $ips; |
|||
} |
|||
if (count($ns) === 0) { |
|||
die('error analyse -> ns'); |
|||
} |
|||
return $ns; |
|||
} |
|||
|
|||
function getHtmlInfo($str) { // 获取官网/Whois服务器信息 |
|||
// if ($str == '') { |
|||
// return array( |
|||
// 'website' => '', |
|||
// 'whois' => '' |
|||
// ); |
|||
// } |
|||
preg_match_all('/<p>[\s\S]+?<\/p>/', $str, $match); |
|||
if (count($match) !== 1) { |
|||
die('error analyse -> info'); |
|||
} |
|||
$match = $match[0]; |
|||
if (count($match) !== 1) { |
|||
die('error analyse -> info'); |
|||
} |
|||
$str = trim($match[0]); |
|||
$str = trim(substr($str, 0, strlen($str) - 4)); |
|||
if ($str === '<p>') { |
|||
return array( |
|||
'website' => '', |
|||
'whois' => '' |
|||
); |
|||
} |
|||
$str = trim(substr($str, 3 - strlen($str))); |
|||
preg_match_all('/<b>[\s\S]+?<\/b>/', $str, $match); |
|||
if (count($match) !== 1) { |
|||
die('error analyse -> info'); |
|||
} |
|||
$match = $match[0]; |
|||
$website = -1; |
|||
$whois = -1; |
|||
if ($match[0] === '<b>URL for registration services:</b>') { |
|||
$website = 0; |
|||
} else if ($match[0] === '<b>WHOIS Server:</b>') { |
|||
$whois = 0; |
|||
} else { |
|||
die('error analyse -> info'); |
|||
} |
|||
if (isset($match[1])) { |
|||
if ($match[1] !== '<b>WHOIS Server:</b>') { |
|||
die('error analyse -> info'); |
|||
} |
|||
$whois = 1; |
|||
} |
|||
$str = preg_replace('/<b>[\s\S]+?<\/b>/', '|_|-|_|', $str); |
|||
$contact = explode('|_|-|_|', $str); |
|||
if ($contact[0] !== '') { |
|||
die('error analyse -> contact'); |
|||
} |
|||
if (count($contact) !== 3 && count($contact) !== 2) { |
|||
die('error analyse -> contact'); |
|||
} |
|||
unset($contact[0]); |
|||
foreach ($contact as $index => $row) { |
|||
$row = trim($row); |
|||
if ($index - 1 === $website) { |
|||
preg_match('/^<a href="[\s\S]+<\/a>/', $row, $match); |
|||
if (count($match) !== 1) { |
|||
die('error analyse -> contact'); |
|||
} |
|||
$row = $match[0]; |
|||
preg_match('/>[\s\S]+</', $row, $match); |
|||
if (count($match) !== 1) { |
|||
die('error analyse -> contact'); |
|||
} |
|||
$row = substr($match[0], 0, strlen($match[0]) - 1); |
|||
$result['website'] = trim(substr($row, 1 - strlen($row))); |
|||
} |
|||
if ($index - 1 === $whois) { |
|||
$result['whois'] = $row; |
|||
} |
|||
} |
|||
if (!isset($result['website'])) { |
|||
$result['website'] = ''; |
|||
} else { |
|||
if (!filter_var($result['website'], FILTER_VALIDATE_URL)) { |
|||
die('error analyse -> contact'); |
|||
} |
|||
} |
|||
if (!isset($result['whois'])) { |
|||
$result['whois'] = ''; |
|||
} else { |
|||
if (!isDomain($result['whois'])) { |
|||
die('error analyse -> contact'); |
|||
} |
|||
} |
|||
return $result; |
|||
} |
|||
|
|||
function getHtmlDate($str) { // 提取TLD注册和更新日期 |
|||
preg_match('/Record last updated [0-9]{4}-[0-9]{2}-[0-9]{2}./', $str, $match); |
|||
if (count($match) !== 1) { |
|||
die('error analyse -> date'); |
|||
} |
|||
$update = substr($match[0], -11); |
|||
$update = substr($update, 0, 10); |
|||
preg_match('/Registration date [0-9]{4}-[0-9]{2}-[0-9]{2}./', $str, $match); |
|||
if (count($match) !== 1) { // only eh TLD |
|||
return array( |
|||
'update' => $update, |
|||
'regist' => $update |
|||
); |
|||
} |
|||
$regist = substr($match[0], -11); |
|||
$regist = substr($regist, 0, 10); |
|||
return array( |
|||
'update' => $update, |
|||
'regist' => $regist |
|||
); |
|||
} |
|||
|
|||
?> |
@ -0,0 +1,44 @@ |
|||
<?php |
|||
|
|||
function getTldsInfo($tldList, $htmlDir) { // 抓取各个TLD数据 |
|||
foreach ($tldList as $tld) { |
|||
$html = splitHtml($htmlDir . substr($tld, 1 - strlen($tld)) . '.html'); |
|||
unset($html['report']); |
|||
if (getHtmlTitle($html['title']) !== $tld) { |
|||
die('error analyse -> title'); |
|||
} |
|||
$info['type'] = getHtmlType($html['type']); |
|||
$info += getHtmlManager($html['manager']); |
|||
$info['admin_contact'] = getHtmlContact($html['admin']); |
|||
$info['tech_contact'] = getHtmlContact($html['tech']); |
|||
$info['nameserver'] = getHtmlNS($html['ns']); |
|||
$web = getHtmlInfo($html['info']); |
|||
$info['website'] = $web['website']; |
|||
$info['whois'] = $web['whois']; |
|||
$date = getHtmlDate($html['date']); |
|||
$info['last_updated'] = $date['update']; |
|||
$info['regist_date'] = $date['regist']; |
|||
$data[$tld] = $info; |
|||
} |
|||
return $data; |
|||
} |
|||
|
|||
function getIanaTlds($htmlFile) { // 获取IANA上所有TLD |
|||
$html = file_get_contents($htmlFile); |
|||
$html = explode('tbody>', $html)[1]; |
|||
$html = explode('</tr>', $html); |
|||
unset($html[count($html) - 1]); |
|||
$punycode = new Punycode; |
|||
foreach ($html as $row) { |
|||
preg_match('/<a [\s\S]+<\/a>/', $row, $match); |
|||
preg_match('/>[\s\S]+</', $match[0], $match); |
|||
$match = substr($match[0], 1, strlen($match[0]) - 2); |
|||
if (substr($match, 0, 8) === '‏') { |
|||
$match = substr($match, 8, strlen($match) - 16); |
|||
} |
|||
$tlds[] = $punycode->encode($match); |
|||
} |
|||
return $tlds; |
|||
} |
|||
|
|||
?> |
@ -0,0 +1,27 @@ |
|||
<?php |
|||
|
|||
require_once './load.php'; |
|||
require_once './analyse.php'; |
|||
require_once './punycode.php'; |
|||
|
|||
// function writeFile($filename, $data) { |
|||
// $file = fopen($filename, 'w'); |
|||
// fwrite($file, $data); |
|||
// fclose($file); |
|||
// } |
|||
|
|||
// $data = getIanaTlds('../html/main.html'); // https://www.iana.org/domains/root/db |
|||
// // writeFile('tlds.txt', implode(PHP_EOL, $data) . PHP_EOL); |
|||
// $urls = ''; |
|||
// foreach ($data as $tld) { |
|||
// $urls .= 'https://www.iana.org/domains/root/db/'; |
|||
// $urls .= substr($tld, 1, strlen($tld) - 1); |
|||
// $urls .= '.html' . PHP_EOL; |
|||
// } |
|||
// writeFile('urls.txt', $urls); |
|||
|
|||
$tlds = getIanaTlds('../html/main.html'); |
|||
$data = getTldsInfo($tlds, '../html/tlds/'); |
|||
echo count($data); |
|||
|
|||
?> |
@ -0,0 +1,361 @@ |
|||
<?php |
|||
|
|||
/** |
|||
* Punycode implementation as described in RFC 3492 |
|||
* |
|||
* @link http://tools.ietf.org/html/rfc3492 |
|||
*/ |
|||
class Punycode { |
|||
/** |
|||
* Bootstring parameter values |
|||
* |
|||
*/ |
|||
const BASE = 36; |
|||
const TMIN = 1; |
|||
const TMAX = 26; |
|||
const SKEW = 38; |
|||
const DAMP = 700; |
|||
const INITIAL_BIAS = 72; |
|||
const INITIAL_N = 128; |
|||
const PREFIX = 'xn--'; |
|||
const DELIMITER = '-'; |
|||
|
|||
/** |
|||
* Encode table |
|||
* |
|||
* @param array |
|||
*/ |
|||
protected static $encodeTable = array( |
|||
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', |
|||
'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', |
|||
'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', |
|||
); |
|||
|
|||
/** |
|||
* Decode table |
|||
* |
|||
* @param array |
|||
*/ |
|||
protected static $decodeTable = array( |
|||
'a' => 0, 'b' => 1, 'c' => 2, 'd' => 3, 'e' => 4, 'f' => 5, |
|||
'g' => 6, 'h' => 7, 'i' => 8, 'j' => 9, 'k' => 10, 'l' => 11, |
|||
'm' => 12, 'n' => 13, 'o' => 14, 'p' => 15, 'q' => 16, 'r' => 17, |
|||
's' => 18, 't' => 19, 'u' => 20, 'v' => 21, 'w' => 22, 'x' => 23, |
|||
'y' => 24, 'z' => 25, '0' => 26, '1' => 27, '2' => 28, '3' => 29, |
|||
'4' => 30, '5' => 31, '6' => 32, '7' => 33, '8' => 34, '9' => 35 |
|||
); |
|||
|
|||
/** |
|||
* Character encoding |
|||
* |
|||
* @param string |
|||
*/ |
|||
protected $encoding; |
|||
|
|||
public $errFlag; |
|||
public $errMsg; |
|||
|
|||
/** |
|||
* Constructor |
|||
* |
|||
* @param string $encoding Character encoding |
|||
*/ |
|||
public function __construct($encoding = 'UTF-8') { |
|||
$this->encoding = $encoding; |
|||
$this->errFlag = false; |
|||
$this->errMsg = ''; |
|||
} |
|||
|
|||
/** |
|||
* Encode a domain to its Punycode version |
|||
* |
|||
* @param string $input Domain name in Unicode to be encoded |
|||
* @return string Punycode representation in ASCII |
|||
*/ |
|||
public function encode($input) { |
|||
$input = mb_strtolower($input, $this->encoding); |
|||
$parts = explode('.', $input); |
|||
foreach ($parts as &$part) { |
|||
$length = strlen($part); |
|||
if ($length < 1) { |
|||
$this->errFlag = true; |
|||
$this->errMsg = 'The length of any one label is limited to between 1 and 63 octets, but ' . $length . ' given.'; |
|||
} |
|||
$part = $this->encodePart($part); |
|||
} |
|||
$output = implode('.', $parts); |
|||
$length = strlen($output); |
|||
if ($length > 255) { |
|||
$this->errFlag = true; |
|||
$this->errMsg = 'A full domain name is limited to 255 octets (including the separators), ' . $length . ' given.'; |
|||
} |
|||
|
|||
return $output; |
|||
} |
|||
|
|||
/** |
|||
* Encode a part of a domain name, such as tld, to its Punycode version |
|||
* |
|||
* @param string $input Part of a domain name |
|||
* @return string Punycode representation of a domain part |
|||
*/ |
|||
protected function encodePart($input) { |
|||
$codePoints = $this->listCodePoints($input); |
|||
|
|||
$n = static::INITIAL_N; |
|||
$bias = static::INITIAL_BIAS; |
|||
$delta = 0; |
|||
$h = $b = count($codePoints['basic']); |
|||
|
|||
$output = ''; |
|||
foreach ($codePoints['basic'] as $code) { |
|||
$output .= $this->codePointToChar($code); |
|||
} |
|||
if ($input === $output) { |
|||
return $output; |
|||
} |
|||
if ($b > 0) { |
|||
$output .= static::DELIMITER; |
|||
} |
|||
|
|||
$codePoints['nonBasic'] = array_unique($codePoints['nonBasic']); |
|||
sort($codePoints['nonBasic']); |
|||
|
|||
$i = 0; |
|||
$length = mb_strlen($input, $this->encoding); |
|||
while ($h < $length) { |
|||
$m = $codePoints['nonBasic'][$i++]; |
|||
$delta = $delta + ($m - $n) * ($h + 1); |
|||
$n = $m; |
|||
|
|||
foreach ($codePoints['all'] as $c) { |
|||
if ($c < $n || $c < static::INITIAL_N) { |
|||
$delta++; |
|||
} |
|||
if ($c === $n) { |
|||
$q = $delta; |
|||
for ($k = static::BASE;; $k += static::BASE) { |
|||
$t = $this->calculateThreshold($k, $bias); |
|||
if ($q < $t) { |
|||
break; |
|||
} |
|||
|
|||
$code = $t + (($q - $t) % (static::BASE - $t)); |
|||
$output .= static::$encodeTable[$code]; |
|||
|
|||
$q = ($q - $t) / (static::BASE - $t); |
|||
} |
|||
|
|||
$output .= static::$encodeTable[$q]; |
|||
$bias = $this->adapt($delta, $h + 1, ($h === $b)); |
|||
$delta = 0; |
|||
$h++; |
|||
} |
|||
} |
|||
|
|||
$delta++; |
|||
$n++; |
|||
} |
|||
$out = static::PREFIX . $output; |
|||
$length = strlen($out); |
|||
if ($length > 63 || $length < 1) { |
|||
$this->errFlag = true; |
|||
$this->errMsg = 'The length of any one label is limited to between 1 and 63 octets, but ' . $length . ' given.'; |
|||
} |
|||
|
|||
return $out; |
|||
} |
|||
|
|||
/** |
|||
* Decode a Punycode domain name to its Unicode counterpart |
|||
* |
|||
* @param string $input Domain name in Punycode |
|||
* @return string Unicode domain name |
|||
*/ |
|||
public function decode($input) |
|||
{ |
|||
$input = strtolower($input); |
|||
$parts = explode('.', $input); |
|||
foreach ($parts as &$part) { |
|||
$length = strlen($part); |
|||
if ($length > 63 || $length < 1) { |
|||
$this->errFlag = true; |
|||
$this->errMsg = 'The length of any one label is limited to between 1 and 63 octets, but ' . $length . ' given.'; |
|||
} |
|||
if (strpos($part, static::PREFIX) !== 0) { |
|||
continue; |
|||
} |
|||
|
|||
$part = substr($part, strlen(static::PREFIX)); |
|||
$part = $this->decodePart($part); |
|||
} |
|||
$output = implode('.', $parts); |
|||
$length = strlen($output); |
|||
if ($length > 255) { |
|||
$this->errFlag = true; |
|||
$this->errMsg = 'A full domain name is limited to 255 octets (including the separators), ' . $length . ' given.'; |
|||
} |
|||
|
|||
return $output; |
|||
} |
|||
|
|||
/** |
|||
* Decode a part of domain name, such as tld |
|||
* |
|||
* @param string $input Part of a domain name |
|||
* @return string Unicode domain part |
|||
*/ |
|||
protected function decodePart($input) |
|||
{ |
|||
$n = static::INITIAL_N; |
|||
$i = 0; |
|||
$bias = static::INITIAL_BIAS; |
|||
$output = ''; |
|||
|
|||
$pos = strrpos($input, static::DELIMITER); |
|||
if ($pos !== false) { |
|||
$output = substr($input, 0, $pos++); |
|||
} else { |
|||
$pos = 0; |
|||
} |
|||
|
|||
$outputLength = strlen($output); |
|||
$inputLength = strlen($input); |
|||
while ($pos < $inputLength) { |
|||
$oldi = $i; |
|||
$w = 1; |
|||
|
|||
for ($k = static::BASE;; $k += static::BASE) { |
|||
$digit = static::$decodeTable[$input[$pos++]]; |
|||
$i = $i + ($digit * $w); |
|||
$t = $this->calculateThreshold($k, $bias); |
|||
|
|||
if ($digit < $t) { |
|||
break; |
|||
} |
|||
|
|||
$w = $w * (static::BASE - $t); |
|||
} |
|||
|
|||
$bias = $this->adapt($i - $oldi, ++$outputLength, ($oldi === 0)); |
|||
$n = $n + (int) ($i / $outputLength); |
|||
$i = $i % ($outputLength); |
|||
$output = mb_substr($output, 0, $i, $this->encoding) . $this->codePointToChar($n) . mb_substr($output, $i, $outputLength - 1, $this->encoding); |
|||
|
|||
$i++; |
|||
} |
|||
|
|||
return $output; |
|||
} |
|||
|
|||
/** |
|||
* Calculate the bias threshold to fall between TMIN and TMAX |
|||
* |
|||
* @param integer $k |
|||
* @param integer $bias |
|||
* @return integer |
|||
*/ |
|||
protected function calculateThreshold($k, $bias) |
|||
{ |
|||
if ($k <= $bias + static::TMIN) { |
|||
return static::TMIN; |
|||
} elseif ($k >= $bias + static::TMAX) { |
|||
return static::TMAX; |
|||
} |
|||
return $k - $bias; |
|||
} |
|||
|
|||
/** |
|||
* Bias adaptation |
|||
* |
|||
* @param integer $delta |
|||
* @param integer $numPoints |
|||
* @param boolean $firstTime |
|||
* @return integer |
|||
*/ |
|||
protected function adapt($delta, $numPoints, $firstTime) |
|||
{ |
|||
$delta = (int) ( |
|||
($firstTime) |
|||
? $delta / static::DAMP |
|||
: $delta / 2 |
|||
); |
|||
$delta += (int) ($delta / $numPoints); |
|||
|
|||
$k = 0; |
|||
while ($delta > ((static::BASE - static::TMIN) * static::TMAX) / 2) { |
|||
$delta = (int) ($delta / (static::BASE - static::TMIN)); |
|||
$k = $k + static::BASE; |
|||
} |
|||
$k = $k + (int) (((static::BASE - static::TMIN + 1) * $delta) / ($delta + static::SKEW)); |
|||
|
|||
return $k; |
|||
} |
|||
|
|||
/** |
|||
* List code points for a given input |
|||
* |
|||
* @param string $input |
|||
* @return array Multi-dimension array with basic, non-basic and aggregated code points |
|||
*/ |
|||
protected function listCodePoints($input) |
|||
{ |
|||
$codePoints = array( |
|||
'all' => array(), |
|||
'basic' => array(), |
|||
'nonBasic' => array(), |
|||
); |
|||
|
|||
$length = mb_strlen($input, $this->encoding); |
|||
for ($i = 0; $i < $length; $i++) { |
|||
$char = mb_substr($input, $i, 1, $this->encoding); |
|||
$code = $this->charToCodePoint($char); |
|||
if ($code < 128) { |
|||
$codePoints['all'][] = $codePoints['basic'][] = $code; |
|||
} else { |
|||
$codePoints['all'][] = $codePoints['nonBasic'][] = $code; |
|||
} |
|||
} |
|||
|
|||
return $codePoints; |
|||
} |
|||
|
|||
/** |
|||
* Convert a single or multi-byte character to its code point |
|||
* |
|||
* @param string $char |
|||
* @return integer |
|||
*/ |
|||
protected function charToCodePoint($char) |
|||
{ |
|||
$code = ord($char[0]); |
|||
if ($code < 128) { |
|||
return $code; |
|||
} elseif ($code < 224) { |
|||
return (($code - 192) * 64) + (ord($char[1]) - 128); |
|||
} elseif ($code < 240) { |
|||
return (($code - 224) * 4096) + ((ord($char[1]) - 128) * 64) + (ord($char[2]) - 128); |
|||
} else { |
|||
return (($code - 240) * 262144) + ((ord($char[1]) - 128) * 4096) + ((ord($char[2]) - 128) * 64) + (ord($char[3]) - 128); |
|||
} |
|||
} |
|||
|
|||
/** |
|||
* Convert a code point to its single or multi-byte character |
|||
* |
|||
* @param integer $code |
|||
* @return string |
|||
*/ |
|||
protected function codePointToChar($code) |
|||
{ |
|||
if ($code <= 0x7F) { |
|||
return chr($code); |
|||
} elseif ($code <= 0x7FF) { |
|||
return chr(($code >> 6) + 192) . chr(($code & 63) + 128); |
|||
} elseif ($code <= 0xFFFF) { |
|||
return chr(($code >> 12) + 224) . chr((($code >> 6) & 63) + 128) . chr(($code & 63) + 128); |
|||
} else { |
|||
return chr(($code >> 18) + 240) . chr((($code >> 12) & 63) + 128) . chr((($code >> 6) & 63) + 128) . chr(($code & 63) + 128); |
|||
} |
|||
} |
|||
} |
Loading…
Reference in new issue