
Добавил:
rus2an
Опубликованный материал нарушает ваши авторские права? Сообщите нам.
Вуз:
Предмет:
Файл:2к4с Управление данными / Выполненая работа / Курсовая работа / parser-php / parser-url-system
.php<?php
include_once('lib/curl_query.php');
include_once('lib/simple_html_dom.php');
$user = 'arezvov1ya_pars';
$pass = 'parserSQL!';
$servername = 'localhost';
$dbname = 'arezvov1ya_pars';
for ($a = 1; $a ; $a++){
if ($a > 5) {
break;
}
$url = "https://www.top500.org/list/2018/11/?page=".$a;
$url = file_get_contents($url);
$htmllist = $url;
$start_table = '</thead>';
$end_table = "</table>";
$start = strpos($htmllist, $start_table);
$end = strpos($htmllist, $end_table);
$result[$a] = substr($htmllist, $start - 1, $end - $start - 1);
$result[$a] = preg_replace('/<a href="/', '<a href="https://www.top500.org',$result[$a]);
}
$full_result = $result[1] . $result[2] . $result[3] . $result[4] . $result[5];
preg_match_all( '/(https:\/\/www.top500.org\/system\/.*)"/i', $full_result, $match );
$systemorg = &$match[1];
$conn = new PDO("mysql:host=$servername;dbname=$dbname", $user, $pass);
for ($b = 0; $b<501 ; $b++){
sleep(5);
$url = $systemorg[$b];
$url = file_get_contents($url);
$html2 = $url;
$start_table = "<ul class=\"breadcrumb\">";
$end_table = "</table>";
$start = strpos($html2, $start_table);
$end = strpos($html2, $end_table);
$result = substr($html2, $start - 1 , $end - $start - 1);
$result = preg_replace("/<th>Site:<\/th>\s+<td><a.+href=\"(.*?)\">(.*?)<\/a><\/td>/","<th>Site:</th><td class=\"site\"><a href=\"$1\">$2</a></td>", $result);
$result = preg_replace("/<th>System URL:<\/th>\s+<td><a.+href=\"(.*?)\">(.*?)<\/a><\/td>/","<th>System URL:</th><td class=\"systemurl\"><a href=\"$1\">$2</a></td>", $result);
$result = preg_replace("/<th>Manufacturer:<\/th>\s+<td>(.*?)<\/td>/","<th>Manufacturer:</th><td class=\"manufacturer\">$1</td>", $result);
$result = preg_replace("/<th>Cores:<\/th>\s+<td>(.*?)<\/td>/","<th>Cores:</th><td class=\"cores\">$1</td>", $result);
$result = preg_replace("/<th>Memory:<\/th>\s+<td>([^s]+.*?[^s]+)<\/td>/","<th>Memory:</th><td class=\"memory\">$1</td>", $result);
$result = preg_replace("/<th>Processor:<\/th>\s+<td>(.*?)<\/td>/","<th>Processor:</th><td class=\"processor\">$1</td>", $result);
$result = preg_replace("/<th>Interconnect:<\/th>\s+<td>(.*?)<\/td>/","<th>Interconnect:</th><td class=\"interconnect\">$1</td>", $result);
$result = preg_replace("/<th>Linpack Performance \(Rmax\)<\/th>\s+<td>(.*?)<\/td>/","<th>Linpack Performance (Rmax)</th><td class=\"linkpackperformance\">$1</td>", $result);
$result = preg_replace("/<th>Theoretical Peak \(Rpeak\)<\/th>\s+<td>(.*?)<\/td>/","<th>Theoretical Peak (Rpeak)</th><td class=\"theoreticalpeak\">$1</td>", $result);
$result = preg_replace("/<th>Nmax<\/th>\s+<td>(.*?)<\/td>/","<th>Nmax</th><td class=\"nmax\">$1</td>", $result);
$result = preg_replace("/<th>HPCG.*?<\/th>\s+<td>(.*?)<\/td>/","<th>HPCG [TFlop/s]</th><td class=\"hpcg\">$1</td>", $result);
$result = preg_replace("/<th>Power:<\/th>\s+<td>([^s]+.*?[^s]+)<\/td>/","<th>Power:</th><td class=\"power\">$1</td>", $result);
$result = preg_replace("/<th>Power Measurement Level:<\/th>\s+<td>(.*?)<\/td>/","<th>Power Measurement Level:</th><td class=\"powermeasurementlevel\">$1</td>", $result);
$result = preg_replace("/<th>Measured Cores:<\/th>\s+<td>(.*?)<\/td>/","<th>Measured Cores:</th><td class=\"measuredcores\">$1</td>", $result);
$result = preg_replace("/<th>MPI:<\/th>\s+<td>(.*?)<\/td>/","<th>MPI:</th><td class=\"mpi\">$1</td>", $result);
$result = preg_replace("/<th>Math Library:<\/th>\s+<td>(.*?)<\/td>/","<th>Math Library:</th><td class=\"mathlibrary\">$1</td>", $result);
$result = preg_replace("/<th>Compiler:<\/th>\s+<td>(.*?)<\/td>/","<th>Compiler:</th><td class=\"compiler\">$1</td>", $result);
$result = preg_replace("/<th>Operating System:<\/th>\s+<td>(.*?)<\/td>/","<th>Operating System:</th><td class=\"operatingsystem\">$1</td>", $result);
$urlsystem = $systemorg[$b];
$html = str_get_html($result);
$system = $html->find('h1', 0)->plaintext;
$system = trim($system, " \t.");
$system= preg_replace("/\s{2,}/"," ",$system);
$namesite = $html->find('td.site', 0)->find('a', 0)->plaintext;
$systemurl = $html->find('td.systemurl', 0)->plaintext;
$manufacturer = $html->find('td.manufacturer', 0)->plaintext;
$cores = $html->find('td.cores', 0)->plaintext;
$cores= preg_replace("/\,/","",$cores);
$memory = $html->find('td.memory', 0)->plaintext;
$memory= preg_replace("/\s{2,}/"," ",$memory);
$memory = trim($memory, " \t.");
$memory= preg_replace("/ GB/","",$memory);
$memory= preg_replace("/\,/","",$memory);
$processor = $html->find('td.processor', 0)->plaintext;
$interconnect = $html->find('td.interconnect', 0)->plaintext;
$linkpackperformance = $html->find('td.linkpackperformance', 0)->plaintext;
$linkpackperformance= preg_replace("/ TFlop\/s/","",$linkpackperformance);
$linkpackperformance= preg_replace("/\,/","",$linkpackperformance);
$theoreticalpeak = $html->find('td.theoreticalpeak', 0)->plaintext;
$theoreticalpeak= preg_replace("/ TFlop\/s/","",$theoreticalpeak);
$theoreticalpeak= preg_replace("/\,/","",$theoreticalpeak);
$nmax = $html->find('td.nmax', 0)->plaintext;
$nmax= preg_replace("/\,/","",$nmax);
$hpcg = $html->find('td.hpcg', 0)->plaintext;
$hpcg= preg_replace("/\,/","",$hpcg);
$power = $html->find('td.power', 0)->plaintext;
$power = trim($power, " \t.");
$power= preg_replace("/\s{2,}/"," ",$power);
$power= preg_replace("/ kW \(Submitted\)/","",$power);
$power= preg_replace("/\,/","",$power);
$powermeasurementlevel = $html->find('td.powermeasurementlevel', 0)->plaintext;
$measuredcores = $html->find('td.measuredcores', 0)->plaintext;
$measuredcores= preg_replace("/\,/","",$measuredcores);
$operatingsystem = $html->find('td.operatingsystem', 0)->plaintext;
$compiler = $html->find('td.compiler', 0)->plaintext;
$mathlibrary = $html->find('td.mathlibrary', 0)->plaintext;
$mpi = $html->find('td.mpi', 0)->plaintext;
$pdoQuery = "INSERT INTO `systemorg2` (
`namesite`,
`system`,
`systemurl`,
`manufacturer`,
`cores`,
`memory`,
`processor`,
`interconnect`,
`linkpackperformance`,
`theoreticalpeak`,
`nmax`,
`hpcg`,
`power`,
`powermeasurementlevel`,
`measuredcores`,
`operatingsystem`,
`compiler`,
`mathlibrary`,
`mpi`,
`urlsystem`)
VALUES (
:namesite,
:system,
:systemurl,
:manufacturer,
:cores,
:memory,
:processor,
:interconnect,
:linkpackperformance,
:theoreticalpeak,
:nmax,
:hpcg,
:power,
:powermeasurementlevel,
:measuredcores,
:operatingsystem,
:compiler,
:mathlibrary,
:mpi,
:urlsystem)
ON DUPLICATE KEY UPDATE
`namesite` = :namesite,
`system` = :system,
`systemurl` = :systemurl,
`manufacturer` = :manufacturer,
`cores` = :cores,
`memory` = :memory,
`processor` = :processor,
`interconnect` = :interconnect,
`linkpackperformance` = :linkpackperformance,
`theoreticalpeak` = :theoreticalpeak,
`nmax` = :nmax,
`hpcg` = :hpcg,
`power` = :power,
`powermeasurementlevel` = :powermeasurementlevel,
`measuredcores` = :measuredcores,
`operatingsystem` = :operatingsystem,
`compiler` = :compiler,
`mpi` = :mpi,
`operatingsystem` = :operatingsystem,
`urlsystem` = :urlsystem";
$pdoResult = $conn->prepare($pdoQuery);
$pdoExec = $pdoResult->execute(array(
":namesite"=>$namesite,
":system"=>$system,
":systemurl"=>$systemurl,
":manufacturer"=>$manufacturer,
":cores"=>$cores,
":memory"=>$memory,
":processor"=>$processor,
":interconnect"=>$interconnect,
":linkpackperformance"=>$linkpackperformance,
":theoreticalpeak"=>$theoreticalpeak,
":nmax"=>$nmax,
":hpcg"=>$hpcg,
":power"=>$power,
":powermeasurementlevel"=>$powermeasurementlevel,
":measuredcores"=>$measuredcores,
":operatingsystem"=>$operatingsystem,
":compiler"=>$compiler,
":mathlibrary"=>$mathlibrary,
":mpi"=>$mpi,
":urlsystem"=>$urlsystem));
}
?>
Соседние файлы в папке parser-php