🎉 First commit
This commit is contained in:
commit
111efb3ddb
591
HTML-To-Markdown.php
Normal file
591
HTML-To-Markdown.php
Normal file
@ -0,0 +1,591 @@
|
|||||||
|
<?php
|
||||||
|
/**
|
||||||
|
* Class HTML_To_Markdown
|
||||||
|
*
|
||||||
|
* A helper class to convert HTML to Markdown.
|
||||||
|
*
|
||||||
|
* @version 2.1.1
|
||||||
|
* @author Nick Cernis <nick@cern.is>
|
||||||
|
* @link https://github.com/nickcernis/html2markdown/ Latest version on GitHub.
|
||||||
|
* @link http://twitter.com/nickcernis Nick on twitter.
|
||||||
|
* @license http://www.opensource.org/licenses/mit-license.php MIT
|
||||||
|
*/
|
||||||
|
class HTML_To_Markdown {
|
||||||
|
/**
|
||||||
|
* @var DOMDocument The root of the document tree that holds our HTML.
|
||||||
|
*/
|
||||||
|
private $document;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @var string|boolean The Markdown version of the original HTML, or false if conversion failed
|
||||||
|
*/
|
||||||
|
private $output;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @var array Class-wide options users can override.
|
||||||
|
*/
|
||||||
|
private $options = array(
|
||||||
|
'header_style' => 'atx', // Set to "atx" to output H1 and H2 headers as # Header1 and ## Header2
|
||||||
|
'suppress_errors' => true, // Set to false to show warnings when loading malformed HTML
|
||||||
|
'strip_tags' => false, // Set to true to strip tags that don't have markdown equivalents. N.B. Strips tags, not their content. Useful to clean MS Word HTML output.
|
||||||
|
'bold_style' => '**', // Set to '__' if you prefer the underlined style
|
||||||
|
'italic_style' => '*', // Set to '_' if you prefer the underlined style
|
||||||
|
);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructor
|
||||||
|
*
|
||||||
|
* Set up a new DOMDocument from the supplied HTML, convert it to Markdown, and store it in $this->$output.
|
||||||
|
*
|
||||||
|
* @param string $html The HTML to convert to Markdown.
|
||||||
|
* @param array $overrides [optional] List of style and error display overrides.
|
||||||
|
*/
|
||||||
|
public function __construct( $html = null, $overrides = null ) {
|
||||||
|
if ( $overrides ) {
|
||||||
|
$this->options = array_merge( $this->options, $overrides );
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( $html ) {
|
||||||
|
$this->convert( $html );
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Setter for conversion options
|
||||||
|
*
|
||||||
|
* @param $name
|
||||||
|
* @param $value
|
||||||
|
*/
|
||||||
|
public function set_option( $name, $value ) {
|
||||||
|
$this->options[$name] = $value;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convert
|
||||||
|
*
|
||||||
|
* Loads HTML and passes to get_markdown()
|
||||||
|
*
|
||||||
|
* @param $html
|
||||||
|
* @return string The Markdown version of the html
|
||||||
|
*/
|
||||||
|
public function convert( $html )
|
||||||
|
{
|
||||||
|
$html = preg_replace( '~>\s+<~', '><', $html ); // Strip white space between tags to prevent creation of empty #text nodes
|
||||||
|
|
||||||
|
$this->document = new DOMDocument();
|
||||||
|
|
||||||
|
if ( $this->options['suppress_errors'] ) {
|
||||||
|
libxml_use_internal_errors( true ); // Suppress conversion errors (from http://bit.ly/pCCRSX )
|
||||||
|
}
|
||||||
|
|
||||||
|
$this->document->loadHTML( '<?xml encoding="UTF-8">' . $html ); // Hack to load utf-8 HTML (from http://bit.ly/pVDyCt )
|
||||||
|
$this->document->encoding = 'UTF-8';
|
||||||
|
|
||||||
|
if ( $this->options['suppress_errors'] ) {
|
||||||
|
libxml_clear_errors();
|
||||||
|
}
|
||||||
|
|
||||||
|
return $this->get_markdown( $html );
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Is Child Of?
|
||||||
|
*
|
||||||
|
* Is the node a child of the given parent tag?
|
||||||
|
*
|
||||||
|
* @param $parent_name string The name of the parent node to search for (e.g. 'code')
|
||||||
|
* @param $node
|
||||||
|
* @return bool
|
||||||
|
*/
|
||||||
|
private static function is_child_of( $parent_name, $node ) {
|
||||||
|
for ( $p = $node->parentNode; $p != false; $p = $p->parentNode ) {
|
||||||
|
if ( is_null( $p ) ) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( $p->nodeName == $parent_name ) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convert Children
|
||||||
|
*
|
||||||
|
* Recursive function to drill into the DOM and convert each node into Markdown from the inside out.
|
||||||
|
*
|
||||||
|
* Finds children of each node and convert those to #text nodes containing their Markdown equivalent,
|
||||||
|
* starting with the innermost element and working up to the outermost element.
|
||||||
|
*
|
||||||
|
* @param $node
|
||||||
|
*/
|
||||||
|
private function convert_children( $node ) {
|
||||||
|
// Don't convert HTML code inside <code> blocks to Markdown - that should stay as HTML
|
||||||
|
if ( self::is_child_of( 'code', $node ) ) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If the node has children, convert those to Markdown first
|
||||||
|
if ( $node->hasChildNodes() ) {
|
||||||
|
$length = $node->childNodes->length;
|
||||||
|
|
||||||
|
for ( $i = 0; $i < $length; $i++ ) {
|
||||||
|
$child = $node->childNodes->item( $i );
|
||||||
|
$this->convert_children( $child );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Now that child nodes have been converted, convert the original node
|
||||||
|
$this->convert_to_markdown( $node );
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get Markdown
|
||||||
|
*
|
||||||
|
* Sends the body node to convert_children() to change inner nodes to Markdown #text nodes, then saves and
|
||||||
|
* returns the resulting converted document as a string in Markdown format.
|
||||||
|
*
|
||||||
|
* @return string|boolean The converted HTML as Markdown, or false if conversion failed
|
||||||
|
*/
|
||||||
|
private function get_markdown()
|
||||||
|
{
|
||||||
|
// Use the body tag as our root element
|
||||||
|
$body = $this->document->getElementsByTagName( 'body' )->item( 0 );
|
||||||
|
|
||||||
|
// Try the head tag if there's no body tag (e.g. the user's passed a single <script> tag for conversion)
|
||||||
|
if ( ! $body ) {
|
||||||
|
$body = $this->document->getElementsByTagName( 'head' )->item( 0 );
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( ! $body ) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert all children of the body element. The DOMDocument stored in $this->doc will
|
||||||
|
// then consist of #text nodes, each containing a Markdown version of the original node
|
||||||
|
// that it replaced.
|
||||||
|
$this->convert_children( $body );
|
||||||
|
|
||||||
|
// Sanitize and return the body contents as a string.
|
||||||
|
$markdown = $this->document->saveHTML(); // stores the DOMDocument as a string
|
||||||
|
$markdown = html_entity_decode( $markdown, ENT_QUOTES, 'UTF-8' );
|
||||||
|
$markdown = html_entity_decode( $markdown, ENT_QUOTES, 'UTF-8' ); // Double decode to cover cases like &nbsp; http://www.php.net/manual/en/function.htmlentities.php#99984
|
||||||
|
$markdown = preg_replace( '/<!DOCTYPE [^>]+>/', '', $markdown ); // Strip doctype declaration
|
||||||
|
$unwanted = array( '<html>', '</html>', '<body>', '</body>', '<head>', '</head>', '<?xml encoding="UTF-8">', '
' );
|
||||||
|
$markdown = str_replace( $unwanted, '', $markdown ); // Strip unwanted tags
|
||||||
|
$markdown = trim( $markdown, '\n\r\0\x0B' );
|
||||||
|
|
||||||
|
$this->output = $markdown;
|
||||||
|
|
||||||
|
return $markdown;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convert to Markdown
|
||||||
|
*
|
||||||
|
* Converts an individual node into a #text node containing a string of its Markdown equivalent.
|
||||||
|
*
|
||||||
|
* Example: An <h3> node with text content of "Title" becomes a text node with content of "### Title"
|
||||||
|
*
|
||||||
|
* @param $node
|
||||||
|
*/
|
||||||
|
private function convert_to_markdown( $node ) {
|
||||||
|
$tag = $node->nodeName; // the type of element, e.g. h1
|
||||||
|
$value = $node->nodeValue; // the value of that element, e.g. The Title
|
||||||
|
|
||||||
|
switch ( $tag ) {
|
||||||
|
case 'p':
|
||||||
|
case 'pre':
|
||||||
|
$markdown = ( trim( $value ) ) ? rtrim( $value ) . PHP_EOL . PHP_EOL : '';
|
||||||
|
break;
|
||||||
|
case 'h1':
|
||||||
|
case 'h2':
|
||||||
|
$markdown = $this->convert_header( $tag, $node );
|
||||||
|
break;
|
||||||
|
case 'h3':
|
||||||
|
$markdown = '### ' . $value . PHP_EOL . PHP_EOL;
|
||||||
|
break;
|
||||||
|
case 'h4':
|
||||||
|
$markdown = '#### ' . $value . PHP_EOL . PHP_EOL;
|
||||||
|
break;
|
||||||
|
case 'h5':
|
||||||
|
$markdown = '##### ' . $value . PHP_EOL . PHP_EOL;
|
||||||
|
break;
|
||||||
|
case 'h6':
|
||||||
|
$markdown = '###### ' . $value . PHP_EOL . PHP_EOL;
|
||||||
|
break;
|
||||||
|
case 'em':
|
||||||
|
case 'i':
|
||||||
|
case 'strong':
|
||||||
|
case 'b':
|
||||||
|
$markdown = $this->convert_emphasis( $tag, $value );
|
||||||
|
break;
|
||||||
|
case 'hr':
|
||||||
|
$markdown = '- - - - - -' . PHP_EOL . PHP_EOL;
|
||||||
|
break;
|
||||||
|
case 'br':
|
||||||
|
$markdown = ' ' . PHP_EOL;
|
||||||
|
break;
|
||||||
|
case 'blockquote':
|
||||||
|
$markdown = $this->convert_blockquote( $node );
|
||||||
|
break;
|
||||||
|
case 'code':
|
||||||
|
$markdown = $this->convert_code( $node );
|
||||||
|
break;
|
||||||
|
case 'ol':
|
||||||
|
case 'ul':
|
||||||
|
$markdown = $value . PHP_EOL;
|
||||||
|
break;
|
||||||
|
case 'li':
|
||||||
|
$markdown = $this->convert_list( $node );
|
||||||
|
break;
|
||||||
|
case 'img':
|
||||||
|
$markdown = $this->convert_image( $node );
|
||||||
|
break;
|
||||||
|
case 'a':
|
||||||
|
$markdown = $this->convert_anchor( $node );
|
||||||
|
break;
|
||||||
|
case '#text':
|
||||||
|
$markdown = preg_replace( '~\s+~', ' ', $value );
|
||||||
|
break;
|
||||||
|
case '#comment':
|
||||||
|
$markdown = '';
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
// If strip_tags is false (the default), preserve tags that don't have Markdown equivalents,
|
||||||
|
// such as <span> and #text nodes on their own. C14N() canonicalizes the node to a string.
|
||||||
|
// See: http://www.php.net/manual/en/domnode.c14n.php
|
||||||
|
$markdown = ($this->options['strip_tags']) ? $value : html_entity_decode( $node->C14N() );
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create a DOM text node containing the Markdown equivalent of the original node
|
||||||
|
$markdown_node = $this->document->createTextNode( $markdown );
|
||||||
|
|
||||||
|
// Replace the old $node e.g. "<h3>Title</h3>" with the new $markdown_node e.g. "### Title"
|
||||||
|
$node->parentNode->replaceChild( $markdown_node, $node );
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convert Header
|
||||||
|
*
|
||||||
|
* Converts h1 and h2 headers to Markdown-style headers in setext style,
|
||||||
|
* matching the number of underscores with the length of the title.
|
||||||
|
*
|
||||||
|
* e.g. Header 1 Header Two
|
||||||
|
* ======== ----------
|
||||||
|
*
|
||||||
|
* Returns atx headers instead if $this->options['header_style'] is "atx"
|
||||||
|
*
|
||||||
|
* e.g. # Header 1 ## Header Two
|
||||||
|
*
|
||||||
|
* @param string $level The header level, including the "h". e.g. h1
|
||||||
|
* @param string $node The node to convert.
|
||||||
|
* @return string The Markdown version of the header.
|
||||||
|
*/
|
||||||
|
private function convert_header( $level, $node ) {
|
||||||
|
$content = $node->nodeValue;
|
||||||
|
|
||||||
|
if ( ! $this->is_child_of( 'blockquote', $node ) && $this->options['header_style'] == 'setext' ) {
|
||||||
|
$length = ( function_exists( 'mb_strlen' ) ) ? mb_strlen( $content, 'utf-8' ) : strlen( $content );
|
||||||
|
$underline = ($level == 'h1' ) ? '=' : '-';
|
||||||
|
$markdown = PHP_EOL . $content . PHP_EOL . str_repeat( $underline, $length ) . PHP_EOL . PHP_EOL; // setext style
|
||||||
|
} else {
|
||||||
|
$prefix = ( $level == 'h1' ) ? '# ' : '## ';
|
||||||
|
$markdown = PHP_EOL . $prefix . $content . PHP_EOL . PHP_EOL; // atx style
|
||||||
|
}
|
||||||
|
|
||||||
|
return $markdown;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Converts inline styles
|
||||||
|
* This function is used to render strong and em tags
|
||||||
|
*
|
||||||
|
* eg <strong>bold text</strong> becomes **bold text** or __bold text__
|
||||||
|
*
|
||||||
|
* @param string $tag
|
||||||
|
* @param string $value
|
||||||
|
* @return string
|
||||||
|
*/
|
||||||
|
private function convert_emphasis( $tag, $value ) {
|
||||||
|
if ( $tag == 'i' || $tag == 'em' ) {
|
||||||
|
$markdown = $this->options['italic_style'] . $value . $this->options['italic_style'];
|
||||||
|
} else {
|
||||||
|
$markdown = $this->options['bold_style'] . $value . $this->options['bold_style'];
|
||||||
|
}
|
||||||
|
|
||||||
|
return $markdown;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convert Image
|
||||||
|
*
|
||||||
|
* Converts <img /> tags to Markdown.
|
||||||
|
*
|
||||||
|
* e.g. <img src="/path/img.jpg" alt="alt text" title="Title" />
|
||||||
|
* becomes ![alt text](/path/img.jpg "Title")
|
||||||
|
*
|
||||||
|
* @param $node
|
||||||
|
* @return string
|
||||||
|
*/
|
||||||
|
private function convert_image( $node ) {
|
||||||
|
$src = $node->getAttribute( 'src' );
|
||||||
|
$alt = $node->getAttribute( 'alt' );
|
||||||
|
$title = $node->getAttribute( 'title' );
|
||||||
|
|
||||||
|
if ( $title != '' ) {
|
||||||
|
$markdown = '![' . $alt . '](' . $src . ' "' . $title . '")'; // No newlines added. <img> should be in a block-level element.
|
||||||
|
} else {
|
||||||
|
$markdown = '![' . $alt . '](' . $src . ')';
|
||||||
|
}
|
||||||
|
|
||||||
|
return $markdown;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convert Anchor
|
||||||
|
*
|
||||||
|
* Converts <a> tags to Markdown.
|
||||||
|
*
|
||||||
|
* e.g. <a href="http://modernnerd.net" title="Title">Modern Nerd</a>
|
||||||
|
* becomes [Modern Nerd](http://modernnerd.net "Title")
|
||||||
|
*
|
||||||
|
* @param $node
|
||||||
|
* @return string
|
||||||
|
*/
|
||||||
|
private function convert_anchor( $node ) {
|
||||||
|
$href = $node->getAttribute( 'href' );
|
||||||
|
$title = $node->getAttribute( 'title' );
|
||||||
|
$text = $node->nodeValue;
|
||||||
|
|
||||||
|
if ( $title != '' ) {
|
||||||
|
$markdown = '[' . $text . '](' . $href . ' "' . $title . '")';
|
||||||
|
} else {
|
||||||
|
$markdown = '[' . $text . '](' . $href . ')';
|
||||||
|
}
|
||||||
|
|
||||||
|
// Append a space if the node after this one is also an anchor
|
||||||
|
$next_node_name = $this->get_next_node_name( $node );
|
||||||
|
|
||||||
|
if ( $next_node_name == 'a' ) {
|
||||||
|
$markdown = $markdown . ' ';
|
||||||
|
}
|
||||||
|
|
||||||
|
return $markdown;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convert List
|
||||||
|
*
|
||||||
|
* Converts <ul> and <ol> lists to Markdown.
|
||||||
|
*
|
||||||
|
* @param $node
|
||||||
|
* @return string
|
||||||
|
*/
|
||||||
|
private function convert_list( $node ) {
|
||||||
|
// If parent is an ol, use numbers, otherwise, use dashes
|
||||||
|
$list_type = $node->parentNode->nodeName;
|
||||||
|
$value = $node->nodeValue;
|
||||||
|
|
||||||
|
if ( $list_type == 'ul' ) {
|
||||||
|
$markdown = '- ' . trim( $value ) . PHP_EOL;
|
||||||
|
} else {
|
||||||
|
$number = $this->get_position( $node );
|
||||||
|
$markdown = $number . '. ' . trim( $value ) . PHP_EOL;
|
||||||
|
}
|
||||||
|
|
||||||
|
return $markdown;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convert Code
|
||||||
|
*
|
||||||
|
* Convert code tags by indenting blocks of code and wrapping single lines in backticks.
|
||||||
|
*
|
||||||
|
* @param $node
|
||||||
|
* @return string
|
||||||
|
*/
|
||||||
|
private function convert_code( $node ) {
|
||||||
|
// Store the content of the code block in an array, one entry for each line
|
||||||
|
|
||||||
|
$markdown = '';
|
||||||
|
|
||||||
|
$code_content = html_entity_decode( $node->C14N() );
|
||||||
|
$code_content = str_replace( array( '<code>', '</code>'), '', $code_content );
|
||||||
|
|
||||||
|
$lines = preg_split( '/\r\n|\r|\n/', $code_content );
|
||||||
|
$total = count( $lines );
|
||||||
|
|
||||||
|
// If there's more than one line of code, prepend each line with four spaces and no backticks.
|
||||||
|
if ( $total > 1 ) {
|
||||||
|
|
||||||
|
// Remove the first and last line if they're empty
|
||||||
|
$first_line = trim( $lines[0] );
|
||||||
|
$last_line = trim( $lines[$total - 1] );
|
||||||
|
$first_line = trim( $first_line, '
' ); //trim XML style carriage returns too
|
||||||
|
$last_line = trim( $last_line, '
' );
|
||||||
|
|
||||||
|
if ( empty( $first_line ) ) {
|
||||||
|
array_shift( $lines );
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( empty( $last_line ) ) {
|
||||||
|
array_pop( $lines );
|
||||||
|
}
|
||||||
|
|
||||||
|
$count = 1;
|
||||||
|
|
||||||
|
// Add this with the backticks
|
||||||
|
$markdown .= '```' . PHP_EOL;
|
||||||
|
|
||||||
|
foreach ( $lines as $line ) {
|
||||||
|
$line = str_replace( '
', '', $line );
|
||||||
|
// Remove the leading tab from the lines (we're using backticks)
|
||||||
|
// $markdown .= ' ' . $line;
|
||||||
|
$markdown .= $line;
|
||||||
|
// Add newlines, except final line of the code
|
||||||
|
if ( $count != $total ) {
|
||||||
|
$markdown .= PHP_EOL;
|
||||||
|
}
|
||||||
|
$count++;
|
||||||
|
}
|
||||||
|
|
||||||
|
$markdown .= '```' . PHP_EOL;
|
||||||
|
|
||||||
|
} else { // There's only one line of code. It's a code span, not a block. Just wrap it with backticks.
|
||||||
|
|
||||||
|
$markdown .= '`' . $lines[0] . '`';
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
return $markdown;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convert blockquote
|
||||||
|
*
|
||||||
|
* Prepend blockquotes with > chars.
|
||||||
|
*
|
||||||
|
* @param $node
|
||||||
|
* @return string
|
||||||
|
*/
|
||||||
|
private function convert_blockquote( $node ) {
|
||||||
|
// Contents should have already been converted to Markdown by this point,
|
||||||
|
// so we just need to add ">" symbols to each line.
|
||||||
|
|
||||||
|
$markdown = '';
|
||||||
|
|
||||||
|
$quote_content = trim( $node->nodeValue );
|
||||||
|
|
||||||
|
$lines = preg_split( '/\r\n|\r|\n/', $quote_content );
|
||||||
|
|
||||||
|
$total_lines = count( $lines );
|
||||||
|
|
||||||
|
foreach ( $lines as $i => $line ) {
|
||||||
|
$markdown .= '> ' . $line . PHP_EOL;
|
||||||
|
if ( $i + 1 == $total_lines ) {
|
||||||
|
$markdown .= PHP_EOL;
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return $markdown;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get Position
|
||||||
|
*
|
||||||
|
* Returns the numbered position of a node inside its parent
|
||||||
|
*
|
||||||
|
* @param $node
|
||||||
|
* @return int The numbered position of the node, starting at 1.
|
||||||
|
*/
|
||||||
|
private function get_position( $node ) {
|
||||||
|
// Get all of the nodes inside the parent
|
||||||
|
$list_nodes = $node->parentNode->childNodes;
|
||||||
|
$total_nodes = $list_nodes->length;
|
||||||
|
|
||||||
|
$position = 1;
|
||||||
|
|
||||||
|
// Loop through all nodes and find the given $node
|
||||||
|
for ( $a = 0; $a < $total_nodes; $a++ ) {
|
||||||
|
$current_node = $list_nodes->item( $a );
|
||||||
|
|
||||||
|
if ( $current_node->isSameNode( $node ) ) {
|
||||||
|
$position = $a + 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return $position;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get Next Node Name
|
||||||
|
*
|
||||||
|
* Return the name of the node immediately after the passed one.
|
||||||
|
*
|
||||||
|
* @param $node
|
||||||
|
* @return string|null The node name (e.g. 'h1') or null.
|
||||||
|
*/
|
||||||
|
private function get_next_node_name( $node ) {
|
||||||
|
$next_node_name = null;
|
||||||
|
|
||||||
|
$current_position = $this->get_position( $node );
|
||||||
|
$next_node = $node->parentNode->childNodes->item( $current_position );
|
||||||
|
|
||||||
|
if ( $next_node ) {
|
||||||
|
$next_node_name = $next_node->nodeName;
|
||||||
|
}
|
||||||
|
|
||||||
|
return $next_node_name;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* To String
|
||||||
|
*
|
||||||
|
* Magic method to return Markdown output when HTML_To_Markdown instance is treated as a string.
|
||||||
|
*
|
||||||
|
* @return string
|
||||||
|
*/
|
||||||
|
public function __toString()
|
||||||
|
{
|
||||||
|
return $this->output();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Output
|
||||||
|
*
|
||||||
|
* Getter for the converted Markdown contents stored in $this->output
|
||||||
|
*
|
||||||
|
* @return string
|
||||||
|
*/
|
||||||
|
public function output()
|
||||||
|
{
|
||||||
|
if ( ! $this->output ) {
|
||||||
|
return '';
|
||||||
|
} else {
|
||||||
|
return $this->output;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
167
export.php
Normal file
167
export.php
Normal file
@ -0,0 +1,167 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
require_once(dirname(__FILE__) . '/Idno/start.php');
|
||||||
|
|
||||||
|
include 'HTML-To-Markdown.php';
|
||||||
|
|
||||||
|
function init() {
|
||||||
|
$this->garray['data'] = array();
|
||||||
|
$this->garray['data']['posts'] = array();
|
||||||
|
$this->garray['data']['tags'] = array();
|
||||||
|
$this->garray['data']['posts_tags'] = array();
|
||||||
|
$this->garray['data']['users'] = array();
|
||||||
|
}
|
||||||
|
|
||||||
|
function populate_meta() {
|
||||||
|
$this->garray['meta'] = array(
|
||||||
|
'exported_on' => date( 'r' ),
|
||||||
|
'version' => '000',
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
function tags() {
|
||||||
|
$all_tags = get_tags();
|
||||||
|
|
||||||
|
if ( ! empty( $all_tags ) ) {
|
||||||
|
foreach ( $all_tags as $tag ) {
|
||||||
|
$this->garray['data']['tags'][] = array(
|
||||||
|
'id' => intval( $tag->term_id ),
|
||||||
|
'name' => $tag->name,
|
||||||
|
'slug' => $tag->slug,
|
||||||
|
'description' => $tag->description,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// cleanup
|
||||||
|
unset( $all_tags );
|
||||||
|
unset( $tag );
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO modifier ça pour Known !
|
||||||
|
// https://github.com/idno/Known/blob/master/Idno/Common/Entity.php
|
||||||
|
function posts() {
|
||||||
|
|
||||||
|
$posts = Idno\Common\Entity::getFromAll([], [], 99999);
|
||||||
|
$slug_number = 0;
|
||||||
|
$_post_tags = [];
|
||||||
|
|
||||||
|
foreach(posts as $post) {
|
||||||
|
global $post;
|
||||||
|
$posts->the_post();
|
||||||
|
|
||||||
|
$post->post_markdown = new HTML_To_Markdown( apply_filters( 'the_content', $post->post_content ) );
|
||||||
|
|
||||||
|
$tags = get_the_tags();
|
||||||
|
if ( ! empty( $tags ) ) {
|
||||||
|
foreach ( $tags as $tag ) {
|
||||||
|
$_post_tags[] = array(
|
||||||
|
'tag_id' => intval( $tag->term_id ),
|
||||||
|
'post_id' => intval( $post->ID )
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
$s = $this->map_status( $post->post_status );
|
||||||
|
|
||||||
|
$image_id = get_post_thumbnail_id( $post->ID );
|
||||||
|
if ( $image_id !== '' ) {
|
||||||
|
$image = wp_get_attachment_image_src( $image_id, 'full' );
|
||||||
|
}
|
||||||
|
|
||||||
|
$this->garray['data']['posts'][] = array(
|
||||||
|
//'id' => intval( $post->ID ),
|
||||||
|
'title' => substr( ( empty( $post->title ) ) ? '(no title)' : $post->post_title, 0, 150 ),
|
||||||
|
'slug' => substr( ( empty( $post->post_name ) ) ? 'temp-slug-' . $slug_number : $post->post_name, 0, 150 ),
|
||||||
|
'markdown' => $post->post_markdown->output(),
|
||||||
|
'html' => apply_filters( 'the_content', $post->post_content ),
|
||||||
|
'image' => ( $image_id ) ? $image[0] : null,
|
||||||
|
'featured' => 0,
|
||||||
|
'page' => ( $post->post_type === 'page' ) ? 1 : 0,
|
||||||
|
'status' => substr( $s, 0, 150 ),
|
||||||
|
'language' => substr( 'en_US', 0, 6 ),
|
||||||
|
'meta_title' => null,
|
||||||
|
'meta_description' => null,
|
||||||
|
'author_id' => $this->_safe_author_id( $post->post_author ),
|
||||||
|
'created_at' => $this->_get_json_date( $post->post_date ),
|
||||||
|
'created_by' => 1,
|
||||||
|
'updated_at' => $this->_get_json_date( $post->post_modified ),
|
||||||
|
'updated_by' => 1,
|
||||||
|
'published_at' => ($s !== 'draft') ? $this->_get_json_date( $post->post_date ) : '',
|
||||||
|
'published_by' => 1,
|
||||||
|
);
|
||||||
|
|
||||||
|
$slug_number += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
$this->garray['data']['posts_tags'] = $_post_tags;
|
||||||
|
|
||||||
|
// cleanup
|
||||||
|
unset( $posts_args );
|
||||||
|
unset( $posts );
|
||||||
|
unset( $slug_number );
|
||||||
|
unset( $_post_tags );
|
||||||
|
unset( $tags );
|
||||||
|
unset( $tag );
|
||||||
|
unset( $s );
|
||||||
|
}
|
||||||
|
|
||||||
|
function populate_data() {
|
||||||
|
if ( $this->garray !== null ) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
$this->garray = array();
|
||||||
|
|
||||||
|
// preps the structure
|
||||||
|
init();
|
||||||
|
|
||||||
|
// attaches metadata
|
||||||
|
meta();
|
||||||
|
|
||||||
|
// attaches tags
|
||||||
|
tags();
|
||||||
|
|
||||||
|
// populates posts
|
||||||
|
posts();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sends necessary headers and whatnot to allow to download file
|
||||||
|
* @return bin file
|
||||||
|
*/
|
||||||
|
public function download_file() {
|
||||||
|
|
||||||
|
// Ensure the user accessing the function actually has permission to do this
|
||||||
|
if ( ! current_user_can('export') ) {
|
||||||
|
wp_die( "<p>You are not allowed to do that.</p>", 'Permission error' );
|
||||||
|
}
|
||||||
|
|
||||||
|
$this->populate_data();
|
||||||
|
|
||||||
|
$upload_dir = wp_upload_dir();
|
||||||
|
$filedir = $upload_dir['path'];
|
||||||
|
$filename = 'wp2ghost_export_' . time() . '.json';
|
||||||
|
|
||||||
|
if ( ! is_writable( $filedir ) ) {
|
||||||
|
wp_die( "<p>Uploads directory is not writable, can't save the Ghost json file :/</p><p>Generated by the Ghost plugin version {$this->version}.</p>", 'Directory not writable' );
|
||||||
|
}
|
||||||
|
|
||||||
|
$handle = fopen( $filedir . '/' . $filename, 'w' );
|
||||||
|
$content = $this->get_json( $this->garray );
|
||||||
|
fwrite( $handle, $content );
|
||||||
|
fclose( $handle );
|
||||||
|
|
||||||
|
header( 'Content-Description: File Transfer' );
|
||||||
|
header( 'Content-Type: application/octet-stream' );
|
||||||
|
header( 'Content-Disposition: attachment; filename='.$filename );
|
||||||
|
header( 'Content-Transfer-Encoding: binary' );
|
||||||
|
header( 'Expires: 0' );
|
||||||
|
header( 'Cache-Control: must-revalidate' );
|
||||||
|
header( 'Pragma: public' );
|
||||||
|
header( 'Content-Length: ' . filesize( $filedir . '/' . $filename ) );
|
||||||
|
|
||||||
|
flush();
|
||||||
|
readfile( $filedir . '/' . $filename );
|
||||||
|
exit;
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user