Recommended:

  • phpclasses.org
  • jsclasses.org
  • jsmag.com
  • siteapps.com
  • View our reviews on Hot Scripts
  • JS Tutorial
  • scripts.com
  • securesignup.com




Recent Comments

Powered by Disqus




Back to articles

Generate sitemap for your site

Here is a function to generate sitemap by parsing your page URLs.

Note: use it only locally, for example, it takes 3-4 seconds to generate a sitemap for http://webcodingeasy.com locally, but it takes about 100 seconds to do it remotely.

<?php
//setting to no time limit, 
set_time_limit(0);
//Placed locally on server it takes about 3-4 seconds to get all urls 
//from http://code-snippets.co.cc
//but it takes about 100 seconds to get same http://code-snippets.co.cc urls 
//from remote server

//all you have to pass is the domain url address like http://code-snippets.co.cc
function sitemap($domain, $base = "", $protocol = "", $sitemap = array())
{
    //if initiated for the first time
    if(trim($base) == "")
    {
        //getting base of domain url address
        $base = str_replace("http://", "", $domain);
        $base = str_replace("https://", "", $base);
        if($base[strlen($base)-1] == "/")
        {
            $base = substr($base, 0, strlen($base)-2);
        }
        //getting proper domain name and protocol
        $domain = trim($domain);
        if(strpos($domain, "http") !== 0)
        {
            $protocol = "http://";
            $domain = $protocol.$domain;
        }
        else
        {
            $protocol = explode("//", $domain);
            $protocol = $protocol[0]."//";
        }
        //first link will be link given by user
        $sitemap[] = $domain;
    }
    
    //requesting link content using curl
    $curl = curl_init();
    curl_setopt($curl, CURLOPT_URL,$domain);
    curl_setopt($curl, CURLOPT_FOLLOWLOCATION, 1);
    curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
    $page = curl_exec($curl);
    curl_close($curl);
    
    //getting all links from href attributes
    preg_match_all("/<a[^>]*hrefs*=s*'([^']*)'|".
                   '<a[^>]*hrefs*=s*"([^"]*)"'."/is", $page, $match);
    for($i = 1; $i < sizeof($match); $i++)
    {
        //walking through links
        foreach($match[$i] as $url)
        {
            //if doesn't start with http and is not empty
            if(strpos($url, "http") === false  && trim($url) !== "")
            {
                //checking if absolute path
                if($url[0] == "/") $url = substr($url, 1);
                //checking if relative path
                else if($url[0] == ".")
                {
                    while($url[0] != "/")
                    {
                        $url = substr($url, 1);
                    }
                    $url = substr($url, 1);
                }
                //transforming to absolute url
                $url = $protocol.$base."/".$url;
            }
            //if new and not empty
            if(!in_array($url, $sitemap) && trim($url) !== "")
            {
                $valid = true;
                //add any other extensions and marks of url that you don't want to appear
                $check = array("javascript:", ".css", ".js", ".ico", 
                               ".jpg", ".png", ".jpeg", ".swf", ".gif");
                foreach($check as $val)
                {
                    if(stripos($url, $val) !== false)
                    {
                        $valid = false;
                        break;
                    }
                }
                //if valid url
                if($valid)
                {
                    //checking if it is url from our domain
                    if(strpos($url, "http://".$base) === 0 || 
                       strpos($url, "https://".$base) === 0)
                    {
                        //adding url
                        $sitemap[] = $url;
                        //and same thing with newly added url
                        $sitemap = sitemap($url, $base, $protocol, $sitemap);
                    }
                }
            }
        }
    }
    //returning array with sitemap
    return $sitemap;
}
//retrieveng arrays of urls and generating sitemap
$arr = sitemap("http://code-snippets.co.cc");
header ("content-type: text/xml");
echo "<?xml version='1.0' encoding='UTF-8'?>n";
echo "<urlset xmlns='http://www.sitemaps.org/schemas/sitemap/0.9'>n";
foreach($arr as $url)
{
    echo "<url>n";
    echo "<loc>".htmlspecialchars($url)."</loc>n";
    echo "</url>n";
}
echo "</urlset>n";
?>

You may also be interested in:

Powered by BlogAlike.com

blog comments powered by Disqus