Скачиваний:
8
Добавлен:
27.11.2024
Размер:
8.03 Кб
Скачать
<?php  
	include_once('lib/curl_query.php');
	include_once('lib/simple_html_dom.php');
	$user = 'arezvov1ya_pars';
	$pass = 'parserSQL!';
	$servername = 'localhost';
	$dbname = 'arezvov1ya_pars';
	
for ($a = 1; $a ; $a++){
	if ($a > 5) {
		
		break;
	}
	
	$url = "https://www.top500.org/list/2018/11/?page=".$a;
$url = file_get_contents($url);
$htmllist = $url;
 
$start_table = '</thead>';
 
$end_table = "</table>";
 
$start = strpos($htmllist, $start_table);
$end = strpos($htmllist, $end_table);
$result[$a] = substr($htmllist, $start - 1, $end - $start - 1);
$result[$a] = preg_replace('/<a href="/', '<a href="https://www.top500.org',$result[$a]);


} 
$full_result = $result[1] . $result[2] . $result[3] . $result[4] . $result[5];	

preg_match_all( '/(https:\/\/www.top500.org\/system\/.*)"/i', $full_result, $match );
  $systemorg = &$match[1];	
	

	$conn = new PDO("mysql:host=$servername;dbname=$dbname", $user, $pass);
for ($b = 0; $b<501 ; $b++){
sleep(5);


	
	$url = $systemorg[$b];
 $url = file_get_contents($url);
 $html2 = $url;  
 $start_table = "<ul class=\"breadcrumb\">";
 
$end_table = "</table>";
 
$start = strpos($html2, $start_table);
$end = strpos($html2, $end_table);
$result = substr($html2, $start - 1 , $end - $start - 1);
 
$result = preg_replace("/<th>Site:<\/th>\s+<td><a.+href=\"(.*?)\">(.*?)<\/a><\/td>/","<th>Site:</th><td class=\"site\"><a href=\"$1\">$2</a></td>", $result);  
$result = preg_replace("/<th>System URL:<\/th>\s+<td><a.+href=\"(.*?)\">(.*?)<\/a><\/td>/","<th>System URL:</th><td class=\"systemurl\"><a href=\"$1\">$2</a></td>", $result);  
$result = preg_replace("/<th>Manufacturer:<\/th>\s+<td>(.*?)<\/td>/","<th>Manufacturer:</th><td class=\"manufacturer\">$1</td>", $result);  
$result = preg_replace("/<th>Cores:<\/th>\s+<td>(.*?)<\/td>/","<th>Cores:</th><td class=\"cores\">$1</td>", $result);  
$result = preg_replace("/<th>Memory:<\/th>\s+<td>([^s]+.*?[^s]+)<\/td>/","<th>Memory:</th><td class=\"memory\">$1</td>", $result);  
$result = preg_replace("/<th>Processor:<\/th>\s+<td>(.*?)<\/td>/","<th>Processor:</th><td class=\"processor\">$1</td>", $result);  
$result = preg_replace("/<th>Interconnect:<\/th>\s+<td>(.*?)<\/td>/","<th>Interconnect:</th><td class=\"interconnect\">$1</td>", $result);  
$result = preg_replace("/<th>Linpack Performance \(Rmax\)<\/th>\s+<td>(.*?)<\/td>/","<th>Linpack Performance (Rmax)</th><td class=\"linkpackperformance\">$1</td>", $result);  
$result = preg_replace("/<th>Theoretical Peak \(Rpeak\)<\/th>\s+<td>(.*?)<\/td>/","<th>Theoretical Peak (Rpeak)</th><td class=\"theoreticalpeak\">$1</td>", $result);  
$result = preg_replace("/<th>Nmax<\/th>\s+<td>(.*?)<\/td>/","<th>Nmax</th><td class=\"nmax\">$1</td>", $result);  
$result = preg_replace("/<th>HPCG.*?<\/th>\s+<td>(.*?)<\/td>/","<th>HPCG [TFlop/s]</th><td class=\"hpcg\">$1</td>", $result);  
$result = preg_replace("/<th>Power:<\/th>\s+<td>([^s]+.*?[^s]+)<\/td>/","<th>Power:</th><td class=\"power\">$1</td>", $result);  
$result = preg_replace("/<th>Power Measurement Level:<\/th>\s+<td>(.*?)<\/td>/","<th>Power Measurement Level:</th><td class=\"powermeasurementlevel\">$1</td>", $result);  
$result = preg_replace("/<th>Measured Cores:<\/th>\s+<td>(.*?)<\/td>/","<th>Measured Cores:</th><td class=\"measuredcores\">$1</td>", $result);  
$result = preg_replace("/<th>MPI:<\/th>\s+<td>(.*?)<\/td>/","<th>MPI:</th><td class=\"mpi\">$1</td>", $result);  
$result = preg_replace("/<th>Math Library:<\/th>\s+<td>(.*?)<\/td>/","<th>Math Library:</th><td class=\"mathlibrary\">$1</td>", $result);  
$result = preg_replace("/<th>Compiler:<\/th>\s+<td>(.*?)<\/td>/","<th>Compiler:</th><td class=\"compiler\">$1</td>", $result);  
$result = preg_replace("/<th>Operating System:<\/th>\s+<td>(.*?)<\/td>/","<th>Operating System:</th><td class=\"operatingsystem\">$1</td>", $result);  

	$urlsystem = $systemorg[$b];
	$html = str_get_html($result);
	$system = $html->find('h1', 0)->plaintext;
	$system = trim($system, " \t.");
	$system= preg_replace("/\s{2,}/"," ",$system);
	$namesite = $html->find('td.site', 0)->find('a', 0)->plaintext;
	$systemurl = $html->find('td.systemurl', 0)->plaintext;
	$manufacturer = $html->find('td.manufacturer', 0)->plaintext;
	$cores = $html->find('td.cores', 0)->plaintext;
	$cores= preg_replace("/\,/","",$cores);
	$memory = $html->find('td.memory', 0)->plaintext;
	$memory= preg_replace("/\s{2,}/"," ",$memory);
	$memory = trim($memory, " \t.");
	$memory= preg_replace("/ GB/","",$memory);
	$memory= preg_replace("/\,/","",$memory);
	$processor = $html->find('td.processor', 0)->plaintext;
	$interconnect = $html->find('td.interconnect', 0)->plaintext;
	$linkpackperformance = $html->find('td.linkpackperformance', 0)->plaintext;
	$linkpackperformance= preg_replace("/ TFlop\/s/","",$linkpackperformance);
		$linkpackperformance= preg_replace("/\,/","",$linkpackperformance);
	$theoreticalpeak = $html->find('td.theoreticalpeak', 0)->plaintext;
	$theoreticalpeak= preg_replace("/ TFlop\/s/","",$theoreticalpeak);
		$theoreticalpeak= preg_replace("/\,/","",$theoreticalpeak);
	$nmax = $html->find('td.nmax', 0)->plaintext;
	$nmax= preg_replace("/\,/","",$nmax);
	$hpcg = $html->find('td.hpcg', 0)->plaintext;
	$hpcg= preg_replace("/\,/","",$hpcg);
	$power = $html->find('td.power', 0)->plaintext;
		$power = trim($power, " \t.");
		$power= preg_replace("/\s{2,}/"," ",$power);
		$power= preg_replace("/ kW \(Submitted\)/","",$power);
		$power= preg_replace("/\,/","",$power);
		
	$powermeasurementlevel = $html->find('td.powermeasurementlevel', 0)->plaintext;
	$measuredcores = $html->find('td.measuredcores', 0)->plaintext;
	$measuredcores= preg_replace("/\,/","",$measuredcores);
	$operatingsystem = $html->find('td.operatingsystem', 0)->plaintext;
	$compiler = $html->find('td.compiler', 0)->plaintext;
	$mathlibrary = $html->find('td.mathlibrary', 0)->plaintext;
	$mpi = $html->find('td.mpi', 0)->plaintext;
	
	$pdoQuery = "INSERT INTO `systemorg2` (
	`namesite`,
	`system`, 
	`systemurl`, 
	`manufacturer`, 
	`cores`, 
	`memory`,	
	`processor`, 
	`interconnect`, 
	`linkpackperformance`, 
	`theoreticalpeak`, 
	`nmax`, 
	`hpcg`, 
	`power`, 
	`powermeasurementlevel`, 
	`measuredcores`, 
	`operatingsystem`, 
	`compiler`, 
	`mathlibrary`, 
	`mpi`,
	`urlsystem`)
	VALUES (
	:namesite,
	:system, 
	:systemurl, 
	:manufacturer, 
	:cores, 
	:memory, 
	:processor, 
	:interconnect, 
	:linkpackperformance, 
	:theoreticalpeak, 
	:nmax, 
	:hpcg, 
	:power, 
	:powermeasurementlevel, 
	:measuredcores, 
	:operatingsystem, 
	:compiler, 
	:mathlibrary, 
	:mpi,
	:urlsystem)
	ON DUPLICATE KEY UPDATE 
	`namesite` = :namesite, 
	`system` = :system, 
	`systemurl` = :systemurl, 
	`manufacturer` = :manufacturer, 
	`cores` = :cores, 
	`memory` = :memory, 
	`processor` = :processor,
	`interconnect` = :interconnect,
	`linkpackperformance` = :linkpackperformance,
	`theoreticalpeak` = :theoreticalpeak,
	`nmax` = :nmax,
	`hpcg` = :hpcg,
	`power` = :power,
	`powermeasurementlevel` = :powermeasurementlevel,
	`measuredcores` = :measuredcores,
	`operatingsystem` = :operatingsystem,
	`compiler` = :compiler,
	`mpi` = :mpi,
	`operatingsystem` = :operatingsystem,
	`urlsystem` = :urlsystem";
	$pdoResult = $conn->prepare($pdoQuery);
	$pdoExec = $pdoResult->execute(array(
	":namesite"=>$namesite, 
	":system"=>$system, 
	":systemurl"=>$systemurl, 
	":manufacturer"=>$manufacturer, 
	":cores"=>$cores, 
	":memory"=>$memory, 
	":processor"=>$processor, 
	":interconnect"=>$interconnect, 
	":linkpackperformance"=>$linkpackperformance, 
	":theoreticalpeak"=>$theoreticalpeak, 
	":nmax"=>$nmax, 
	":hpcg"=>$hpcg, 
	":power"=>$power, 
	":powermeasurementlevel"=>$powermeasurementlevel, 
	":measuredcores"=>$measuredcores, 
	":operatingsystem"=>$operatingsystem,  
	":compiler"=>$compiler, 
	":mathlibrary"=>$mathlibrary, 
	":mpi"=>$mpi,
	":urlsystem"=>$urlsystem));
}
	

?>
Соседние файлы в папке parser-php