
Добавил:
rus2an
Опубликованный материал нарушает ваши авторские права? Сообщите нам.
Вуз:
Предмет:
Файл:2к4с Управление данными / Выполненая работа / Курсовая работа / parser-php / parser-url-site
.php<?php
include_once('lib/curl_query.php');
include_once('lib/simple_html_dom.php');
$user = 'arezvov1ya_pars';
$pass = 'parserSQL!';
$servername = 'localhost';
$dbname = 'arezvov1ya_pars';
for ($a = 1; $a ; $a++){
if ($a > 5) {
break;
}
$url = "https://www.top500.org/list/2018/11/?page=".$a;
$url = file_get_contents($url);
$htmllist = $url;
$start_table = '</thead>';
$end_table = "</table>";
$start = strpos($htmllist, $start_table);
$end = strpos($htmllist, $end_table);
$result[$a] = substr($htmllist, $start - 1, $end - $start - 1);
$result[$a] = preg_replace('/<a href="/', '<a href="https://www.top500.org',$result[$a]);
}
$full_result = $result[1] . $result[2] . $result[3] . $result[4] . $result[5];
preg_match_all( '/(https:\/\/www.top500.org\/site\/.*)"/i', $full_result, $match );
$siteorg = &$match[1];
$conn = new PDO("mysql:host=$servername;dbname=$dbname", $user, $pass);
for ($b = 0; $b<2 ; $b++){
sleep(5);
$url = $siteorg[$b];
$url = file_get_contents($url);
$html2 = $url;
$start_table = "<ul class=\"breadcrumb\">";
$end_table = "</table>";
$start = strpos($html2, $start_table);
$end = strpos($html2, $end_table);
$result = substr($html2, $start - 1 , $end - $start - 1);
$result = preg_replace("/<th>Segment<\/th><td>(.*?)<\/td>/","<th>Segment</th><td class=\"segment\">$1</td>", $result);
$result = preg_replace("/<th>City<\/th><td>(.*?)<\/td>/","<th>City</th><td class=\"city\">$1</td>", $result);
$result = preg_replace("/<th>Country<\/th><td>(.*?)<\/td>/","<th>Country</th><td class=\"country\">$1</td>", $result);
$urlsite = $siteorg[$b];
$html = str_get_html($result);
$company = $html->find('h1', 1)->plaintext;
$urlcompany = $html->find('tr', 0)->find('td', 0)->plaintext;
$segment = $html->find('td.segment', 0)->plaintext;
$city = $html->find('td.city', 0)->plaintext;
$country = $html->find('td.country', 0)->plaintext;
$pdoQuery = "INSERT INTO `siteorg` (
`company`,
`urlcompany`,
`segment`,
`city`,
`country`,
`urlsite`)
VALUES (
:company,
:urlcompany,
:segment,
:city,
:country,
:urlsite)
ON DUPLICATE KEY UPDATE `company` = :company, `urlcompany` = :urlcompany, `segment`= :segment, `city` = :city, `country` = :country, `urlsite` = :urlsite";
$pdoResult = $conn->prepare($pdoQuery);
$pdoExec = $pdoResult->execute(array(
":company"=>$company,
":urlcompany"=>$urlcompany,
":segment"=>$segment,
":city"=>$city,
":country"=>$country,
":urlsite"=>$urlsite
));
}
?>
Соседние файлы в папке parser-php