Jump to content

get content of website


fer0an

Recommended Posts

Hello

I want get content of website and insert it to my website

This website have 2000 page and each one have 10 post.

I developed some code but I've some problem:

for($i=1;$i<=3;$i++)   //$i is website pages
{
//download main page
$maincatst = file_get_contents($catlink.$i);  //$catlink select link of category of website
//number of post in main page
for ($j=1;$j<=10;$j++)
{
$linkposttmp = TextBetween('before link','after link',$maincatst) ;

$link = TextBetween('before link','after link>',$linkposttmp);

// download content of each link 
$main_post_str = file_get_contents($link);

 

My problem is when run this file only download first link of each mainpage.

anyone can help me?

Link to comment
Share on other sites

<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">



<html xmlns="http://www.w3.org/1999/xhtml">

<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
<title>Untitled Document</title>
</head>
<?php
set_time_limit(0) ;
ob_implicit_flush(true);
ob_end_flush();
//get file extension
function get_file_extension($file_name) 
{
return substr(strrchr($file_name,'.'),1);
}
function TextBetween($s1,$s2,$s){
$s1 = strtolower($s1);
$s2 = strtolower($s2);
$L1 = strlen($s1);
$scheck = strtolower($s);
if($L1>0){$pos1 = strpos($scheck,$s1);} else {$pos1=0;}
if($pos1 !== false){
   if($s2 == '') return substr($s,$pos1+$L1);
   $pos2 = strpos(substr($scheck,$pos1+$L1),$s2);
   if($pos2!==false) return substr($s,$pos1+$L1,$pos2);

}

return '';

}








$dbuser = "user";
$dbpass = "pass";
$dbhost = "localhost";
$dbname = "database";



// Connecting, selecting database



$con = mysql_connect($dbhost, $dbuser, $dbpass)

   or die('Could not connect: ' . mysql_error());







mysql_select_db($dbname) or die('Could not select database');











//cat ha:



$catstring = '




[name]Arts and Photography[/name][link]http://www.website.com/arts-photography/page/[/link]

[name]Outdoors and Nature[/name][link]http://www.website.com/outdoors-nature/page/[/link]';







    $query_table ="CREATE TABLE `second_content` (



`name` VARCHAR( 255 ) NOT NULL ,



`text` TEXT NOT NULL ,



`dllink` VARCHAR( 500 ) NOT NULL ,



`size` VARCHAR( 20 ) NOT NULL ,



`changed` TINYINT( 1 ) NOT NULL DEFAULT '0' ,



`fileext` VARCHAR( 5 ) NOT NULL ,



`cat` VARCHAR (50) NOT NULL , 

`dblink` VARCHAR( 500 ) NOT NULL 

) ;";



    if( $result_table = mysql_query($query_table) ) {echo "yes1";} else echo "no1"; 



    



    



  while($catlink = TextBetween('[link]','[/link]',$catstring))



   {



    //dar avardane link va cat name  



    $catname = TextBetween('[name]','[/name]',$catstring);



    $catstring = str_replace("[link]".$catlink."[/link]","",$catstring);



    $catstring = str_replace("[name]".$catname."[/name]","",$catstring);



    echo "<b>".$catname."</b><br><b>".$catlink."</b><br>".$catstring."<br>" ;
    
    
     //tedad safahate category
     $i=1;
     while ($i<=4)
     {
     echo "<font color=\"blue\">".$i."</font>"; 


//download e safe asli
$maincatst = file_get_contents($catlink.$i);
//echo $maincatst;
//tedad post dar har safhe

$i++;
{
$linkposttmp = TextBetween('dle-info','footer',$maincatst) ;
//echo $linkposttmp;

$link = TextBetween('shortnews-header"><a href="','">',$linkposttmp);

echo $link."<br>";

// download content url haye safe farei
$main_post_str = file_get_contents($link);



//title
            $title = TextBetween('fullnews-header">','</div>',$main_post_str);

//alias
                  $alias1 = str_replace(" ","-",$title);
          $pieces = explode("-", $alias1);
          $alias = $pieces[0]."-".$pieces[1]."-".$pieces[2]."-".$pieces[3]."-".$pieces[4]."-".$pieces[5];

             
//full text            

                        $text0= TextBetween('<div id="news-id-','"fullnews-info-bottom">',$main_post_str);
                        //echo $text0;
                        $text = TextBetween('<b>','</div>',$text0);
                        $text ="<b>".$text;
//   echo $text;            

    
//images names
$imagename = str_replace(" ","",$title);
$imagename = str_replace("#","_",$imagename);
      $imagename = str_replace("\\","_",$imagename);
      $imagename = str_replace("/","_",$imagename);
//images address
$images1 = textbetween('<div id="news-id-','/></a>',$main_post_str);
$images1 = textbetween('src="','"',$images1);

//save images
$im = imagecreatefromjpeg($images1); // original image

//reduce image quality
imagejpeg($im, "/home/site/public_html/static/images/book/".$imagename.".jpg" , 60); // save to new image, third value is quality (0-100) if not specified its the default (75)
$size = getimagesize($images1);
$dimention = $size[3];

$images = '<img src="http://www.website.com/static/images/book/'.$imagename.'.jpg" '.$dimention.'';
$images = $images.' align="right">';
$images = str_replace ('<img src=""','<img src="http://www.website.com/images/noimage.jpg"',$images);
//echo $images;
//if picture exist
if (@fclose(@fopen( "$images1", "r" ))) {
print("File exists.");
} else {
print("File does not exist.");
} 


//full text

                        $full = '<p align="left">'.$images.'</p></br>'.$text;
                  //      echo $full;


//DL link 
$finddl = textbetween ("<div id='comm-id-",'</noindex>',$main_post_str);
$finddl1 = textbetween ('<noindex><a href="','"',$finddl);
$size = textbetween ('Format','<noindex>',$finddl);
$size = textbetween ('Size:','</b>',$size);

//echo $size."<br/>";
//echo $finddl1."<br/>";


//extension

            if( strpos(finddl1,".zip") ) { $ext = ".zip";} else $ext = ".rar";
            




//pak kardane url ghabli
$maincatst = str_replace('shortnews-header"><a href="'.$linkposttmp.'">', "",$maincatst);


//continue if exist bar hasbe title
$query_se = mysql_query(" select `name` from `second_content` where `name`='$title' " );
if (!$query_se)
  {
  die('Error: exist ' . mysql_error());
  }
$title_numrows = mysql_num_rows($query_se);
$nat_se = mysql_fetch_array($query_se);

$title_tm = $nat_se["name"];

echo "titletm= $title_tm ";

if ($title_numrows>0 )

{ 
echo "$title :found<br>";
continue ;
}
else 
{
echo "<font color='red' >$title :$title_tm found</font><br>";
print_r($nat_se);
}





            //rikhtane info dar db


        
$query_insert = "INSERT INTO `second_content` (`name`, `text`, `dllink`, `size`, `changed`, `fileext`, `cat`, `dblink`)
VALUES ('".$title."', '".$full."', '".$finddl1."', '".$size."', '0', '".$ext."', '".$catname."', '".$dblink."');";

if($result_insert =mysql_query($query_insert)) {echo "yes2".$title."<br>";} else echo "<font color='red' ><b>no2</b></font>".$title;


$query_se = "SELECT * FROM `first_content` WHERE `name`='".$title."' ";



$result_se = mysql_query($query_se);

$title_numrows = mysql_num_rows($result_se);

$nat_se = mysql_fetch_array($result_se);



$title_tm = $nat_se["name"];



echo "titletm= $title_tm ";



if ($title_numrows>0 ) { echo "$title :found<br>"; continue ;}
else
{
echo "<font color='red' >$title :$title_tm found</font>";
print_r($nat_se);
}










if($result_insert =mysql_query($query_insert)) {echo "yes2".$title."<br>";} else echo "no2".$title;



}



}



        }





     echo "<br>salam".$i;



    



     $strp_no = strpos($maincatst,"No articles found" );



    while( $strp_no == FALSE);



    echo "<br>salam<br>";




}



?>

</html>

Link to comment
Share on other sites

This thread is more than a year old. Please don't revive it unless you have something important to add.

Join the conversation

You can post now and register later. If you have an account, sign in now to post with your account.

Guest
Reply to this topic...

×   Pasted as rich text.   Restore formatting

  Only 75 emoji are allowed.

×   Your link has been automatically embedded.   Display as a link instead

×   Your previous content has been restored.   Clear editor

×   You cannot paste images directly. Upload or insert images from URL.

×
×
  • Create New...

Important Information

We have placed cookies on your device to help make this website better. You can adjust your cookie settings, otherwise we'll assume you're okay to continue.