Auto-embed link info from URLs in link post format?

Would it be possible to make the link post format work the way links are shared in Facebook, i.e. auto-populate a title, description, and thumbnail?

A plugin for BuddyPress (Buddypress Links, see screenshots) makes this possible for community links… any ideas how I could achieve something similar for link post format items?

2 Answers
2

I have a class i coded when i was first played around with the WordPress HTTP API Which does just about what you need:

<?php
/**
 *  WebPage_Info class
 *  
 *  @version 0.1
 *  @author Ohad Raz <[email protected]>
 *  @package WebPage_Info
 *  @copyright Ohad Raz 2011
 *  
 */
if (!class_exists('webpage_info')){
    /**
     *  WebPage_Info class
     */
    class webpage_info{

        /**
         * page url 
         * @var string
         * @access public
         * @since 0.1
         */
        public $_url;
        /**
         * page html
         * @var string
         * @access public
         * @since 0.1
         */
        public $_body;
        /**
         * page title
         * @var string
         * @access public
         * @since 0.1
         */
        public $_title;
        /**
         * page meta description
         * @var string
         * @access public
         * @since 0.1
         */
        public $_description;
        /**
         * page as domDocument
         * @var domDocument object
         * @access public
         * @since 0.1
         */
        public $_dom;
        /**
         * if page was loaded correctly or not
         * @var bool
         * @access public
         * @since 0.1
         */
        public $_found;
        /**
         * array of page meta keywords
         * @var array
         * @access public
         * @since 0.1
         */
        public $_keywords;
        /**
         * any other element which is requested by tag for faster access
         * @var array
         * @access public
         * @since 0.1
         */
        public $_tags;

        /**
         * any other element which is requested by id for faster access
         * @var array
         * @access public
         * @since 0.1
         */
        public $_ids;

        /**
         * Class constructor
         * 
         * @access public
         * @since 0.1
         * @author Ohad Raz <[email protected]>
         * 
         * @param $url string  url to load
         * @param $html bool if the given url is a url or html to load
         * 
         * @return Void
         */
        public function __construct($url,$html=false){
            $this->_url = $url;
            $this->_body = "";
            $this->_title = "";
            $this->_description = "";
            $this->_dom = "";
            $this->_found = false;
            $this->_tags = array();
            $this->_ids = array();
            if ($html)
                $this->get_local_html($url);
            else
                $this->get_remote_html();
        }

        /**
         * get_local_html will load domDocument object form html as string
         * 
         * @access public
         * @since 0.1
         * @author Ohad Raz <[email protected]>
         * 
         * @param  (string) $body oh html to load
         * 
         * @return Void
         */
        public function get_local_html($body) {
                $dom = new DOMDocument();
                @$dom->loadHTML($body);
                $this->_dom = $dom;
                $this->_found = true;
        }

        /**
         * get_remote_html will load remote url into domDocument object
         * 
         * @access public
         * @since 0.1
         * @author Ohad Raz <[email protected]>
         * 
         * @uses wp_remote_get and falls back to curl_exec
         * 
         * @return Void
         */
        public function get_remote_html(){
            if (function_exists('wp_remote_get')){
                $resp = wp_remote_get( $this->url );
                if ( 200 == $resp['response']['code'] ) {
                    $this->_body = $resp['body'];
                    $this->_found = true;
                }
            }else{
                $curl = curl_init($this->url);
                curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
                $result = curl_exec($curl);
                if ($result !== false){
                    $this->_body = $result;
                    $this->_found = true;
                }
                curl_close($curl);
            }
            if ($this->_found){
                $dom = new DOMDocument();
                @$dom->loadHTML($this->_body);
                $this->_dom = $dom;
            }
        }


        /**
         * Title function will return the title of the curent page
         * 
         * @access public
         * @since 0.1
         * @author Ohad Raz <[email protected]>
         * 
         * @return string
         */
        public function Title(){
            if ($this->_title != ""){
                return $this->_title;
            }else{
                $t = $this->_dom->getElementsByTagName('title');
                if ($t->length == 1){
                    $this->_title = $t->item(0)->nodeValue;
                    return $this->_title;
                }else{
                    $this->_title = null;
                    return null;
                }
            }
        }

        /**
         * Description function will return  the description of the curent page
         * 
         * @access public
         * @since 0.1
         * @author Ohad Raz <[email protected]>
         * 
         * @return string 
         */
        public function Description(){
            if ($this->_description != ""){
                return $this->_description;
            }else{
                $ds = $this->_dom->getElementsByTagName('meta');
                if ($ds->length > 0){
                    foreach ($ds as $d) {
                        if ( strtolower( $d->getAttribute( 'name' ) ) == 'description' ) {
                            $this->_description = $d->getAttribute( 'content' );
                            return $this->_description;
                        }else {
                            continue;
                        }
                    }
                    $this->_description = null;
                    return null;
                }else{
                    $this->_description = null;
                    return null;
                }
            }
        }


        /**
         * KeyWords function will return an array of the keywords of the curent page
         * 
         * @access public
         * @since 0.1
         * @author Ohad Raz <[email protected]>
         * 
         * @return array of strings
         */
        public function Keywords(){
            if (count($this->_keywords) > 0){
                return $this->_keywords;
            }else{
                $ds = $this->_dom->getElementsByTagName('meta');
                if ($ds->length > 0){
                    foreach ($ds as $d) {
                        if ( strtolower( $d->getAttribute( 'name' ) ) == 'keywords' ) {
                            $this->_keywords = explode(",",$d->getAttribute( 'content' ));
                            return $this->_keywords;
                        }else {
                            continue;
                        }
                    }
                    $this->_keywords = null;
                    return null;
                }else{
                    $this->_keywords = null;
                    return null;
                }
            }
        }

        /**
         * getEByTagName function to get elements by tag name
         * 
         * @access public
         * @since 0.1
         * @author Ohad Raz <[email protected]>
         * 
         * @param  string  $tag    tagName
         * @param  string  $output dom: domElement object, array: array of tag,html,and attributes as array, html: html string of the element
         * @param  boolean $force  force generation or load from cache
         * 
         * @return mixed depend on what you set output to be, will return null when tag name not found
         */
        public function getEByTagName($tag,$output = "dom",$force = false){
            if (isset($this->_tags[$tag]) && !$force)
                return $this->_tags[$tag];

            $temp = $this->_dom->getElementsByTagName($tag);
            if ($temp === null)
                $this->_tags[$tag] = null;
                return $this->_tags[$tag];

            if ($output == "dom"){
                $this->_tags[$tag] = $temp;
                return $temp;
            }elseif($output == "array"){
                $tempArr = array();
                foreach ($temp as $e) {
                    $tempA =array('tag' => $tag,'html' => $this->_dom->saveXML($e));
                    if ($e->hasAttributes()) {
                        foreach ($e->attributes as $attr) {
                            $tempA['attributes'][$attr->nodeName] = $attr->nodeValue;
                        }
                    }
                    $tempArr[] = $tempA;
                }
                $this->_tags[$tag] = $tempArr;
                return $tempArr;
            }else{
                $tempArr = array();
                foreach ($temp as $e) {
                    $tempArr[] = $this->_dom->saveXML($e);
                }
                $this->_tags[$tag] = $tempArr;
                return $tempArr;
            }


        }

        /**
         * getEByID function to get elements by id
         * 
         * @access public
         * @since 0.1
         * @author Ohad Raz <[email protected]>
         * 
         * @param  string  $id    Element id to fetch
         * @param  string  $output dom: domElement object, array: array of tag,html,and attributes as array, html: html string of the element
         * @param  boolean $force  force generation or load from cache
         * 
         * @return mixed depend on what you set output to be, will return null when tag name not found
         */
        public function getEByID($id,$output = "dom",$force = false){
            if (isset($this->ids[$id]) && !$force)
                return $this->ids[$id];
            //$temp =  $this->_dom->getElementById($id);
            $temp = $this->getElementById($id);
            if ($temp === null) {
                $this->ids[$id] = null;
                return null;
            }
            if ($output == "dom"){
                $this->ids[$id] = $temp;
                return $temp;
            }elseif($output == "array"){
                $tempA =array('tag' => $temp->tagName,'html' => $this->_dom->saveXML($temp));
                if ($temp->hasAttributes()){
                    foreach ($temp->attributes as $attr) {
                        $tempA['attributes'][$attr->nodeName] = $attr->nodeValue;
                    }
                }
                $this->ids[$id] = $tempA;
                return $tempA;
            }else{
                $tempA = $this->_dom->saveXML($temp);   
                $this->ids[$id] = $tempA;
                return $tempA;
            }
        }

        /**
         * getElementById using XpathDom
         * 
         * @access public
         * @since 0.1
         * @author Ohad Raz <[email protected]>
         * 
         * @param  string $id elemet id to get
         * 
         * @return domDocument object
         */
        public function getElementById($id){
            $xpath = new DOMXPath($this->_dom);
            return $xpath->query("//*[@id='$id']")->item(0);
        }
    }//end class
}//end if

Now once you have this class in place you can use it very simply to do just what you want ex:

$WebPage = new webpage_info('http://en.bainternet.info');
if ($WebPage->_found){
    //page title
    $title = $WebPage->Title();
    //page description
    $description = $WebPage->Description();
    $imgTags = $WebPage->getEByTagName('img','array');
    //here $imgTags will hold an array of all images on that page with all of the attributes as a nested array.
}

so all that is left for you to do is very little just remember that it will work within WordPress using the HTTP API and outside WordPress it will use PHP CURL.

Leave a Comment