Commit initial

This commit is contained in:
Clement Desmidt 2016-10-06 12:19:58 +02:00
commit ff2f296585
40 changed files with 3148 additions and 0 deletions

20
composer.json Normal file
View File

@ -0,0 +1,20 @@
{
"name": "shikiryu/tumblr2shaarli",
"description": "Copy the given tumblr to your shaarli",
"version": "0.1.0",
"keywords": ["tumblr", "shaarli"],
"homepage": "https://shikiryu.com",
"time": "2016-10-06",
"license": "MIT",
"authors": [
{
"name": "Shikiryu",
"email": "pro@shikiryu.com",
"homepage": "https://shikiryu.com",
"role": "Developer"
}
],
"require": {
"league/html-to-markdown": "^4.2"
}
}

211
import.php Normal file
View File

@ -0,0 +1,211 @@
<?php
function mylog($message) {
echo $message.'<br>'."\n";
file_put_contents(sprintf('%s/import.log', dirname(__FILE__)), $message."\n", FILE_APPEND);
}
mylog('start');
// Pulling config from ini file
$ini_file = sprintf('%s/tumblr.ini', dirname(__FILE__));
if (!is_readable($ini_file)) {
die('You must have a tumblr.ini file');
}
$config = parse_ini_file($ini_file);
$tumblr_blog = $config['tumblr'];
$api_key = $config['api_key'];
$private = $config['private'];
$shaarli_dir = sprintf('%s/%s', dirname(__FILE__), $config['shaarli_dir']);
// Loading every needed class
require 'vendor/autoload.php';
// Shaarli library
require_once sprintf('%sapplication/ApplicationUtils.php', $shaarli_dir);
require_once sprintf('%sapplication/Cache.php', $shaarli_dir);
require_once sprintf('%sapplication/CachedPage.php', $shaarli_dir);
require_once sprintf('%sapplication/config/ConfigManager.php', $shaarli_dir);
require_once sprintf('%sapplication/config/ConfigPlugin.php', $shaarli_dir);
require_once sprintf('%sapplication/FeedBuilder.php', $shaarli_dir);
require_once sprintf('%sapplication/FileUtils.php', $shaarli_dir);
require_once sprintf('%sapplication/HttpUtils.php', $shaarli_dir);
require_once sprintf('%sapplication/Languages.php', $shaarli_dir);
require_once sprintf('%sapplication/LinkDB.php', $shaarli_dir);
require_once sprintf('%sapplication/LinkFilter.php', $shaarli_dir);
require_once sprintf('%sapplication/LinkUtils.php', $shaarli_dir);
require_once sprintf('%sapplication/NetscapeBookmarkUtils.php', $shaarli_dir);
require_once sprintf('%sapplication/PageBuilder.php', $shaarli_dir);
require_once sprintf('%sapplication/TimeZone.php', $shaarli_dir);
require_once sprintf('%sapplication/Url.php', $shaarli_dir);
require_once sprintf('%sapplication/Utils.php', $shaarli_dir);
require_once sprintf('%sapplication/PluginManager.php', $shaarli_dir);
require_once sprintf('%sapplication/Router.php', $shaarli_dir);
require_once sprintf('%sapplication/Updater.php', $shaarli_dir);
// Initialize variables
// DO NOT TOUCH
$offset = 0;
$per_page = 20;
$API_post_url = sprintf('https://api.tumblr.com/v2/blog/%s/posts?api_key=%s&limit=1&offset=%u', $tumblr_blog, $api_key, 0);
$converter = new \League\HTMLToMarkdown\HtmlConverter();
$importCount = 0;
$alreadyCount = 0;
$conf = new ConfigManager();
$pagecache = sprintf('%s/pagecache', $shaarli_dir);
$linkDb = new LinkDB(
sprintf('%s/data/datastore.php', $shaarli_dir),
true,
false,
'',
true
);
mylog('Backup '.$tumblr_blog);
// First request to determine the number of tumblr post to save in Shaarli
$json_response = file_get_contents($API_post_url);
$json_response = json_decode($json_response, true);
mylog('First request result : '.var_export($json_response, true));
$total_count = $json_response['response']['total_posts'];
mylog('Found '.$total_count.' entries on Tumblr');
$loop = ceil($total_count / $per_page);
mylog('We are going to do '.$loop.' loops');
// Tumblr API can fetch 20 posts each time, so we need to paginate.
for( $i = 0; $i < $loop; $i++) {
mylog('--- LOOP '.($i+1).' ---');
$API_post_url = sprintf('https://api.tumblr.com/v2/blog/%s/posts?api_key=%s&offset=%u', $tumblr_blog, $api_key, ($per_page*$i));
$json_response = file_get_contents($API_post_url);
$json_response = json_decode($json_response, true);
if (!isset($json_response['response'])) {
die(sprintf('wrong reponse %s', var_export($json_response,true)));
}
foreach ($json_response['response']['posts'] as $post) {
$newLink = [];
switch($post['type']) {
case 'text':
$newLink = [
'title' => $post['title'],
'url' => $post['post_url'],
'description' => $post['body'],
'private' => $private,
'linkdate' => $post['date'],
'tags' => array_merge($post['tags'], ['tumblr', 'tumblr_text'])
];
break;
case 'photo':
$big_pic = $post['photos'][0]['alt_sizes'][0];
$newLink = [
'title' => $post['summary'],
'url' => $post['post_url'],
'description' => sprintf('<img src="%s" alt="tumblr" /><p>%s</p>',$big_pic['url'], $post['caption']),
'private' => $private,
'linkdate' => $post['date'],
'tags' => array_merge($post['tags'], ['tumblr', 'tumblr_photo'])
];
break;
case 'quote':
$url = empty($post['source_url']) ? $post['post_url'] : $post['source_url'];
$newLink = [
'title' => $post['text'],
'url' => $url,
'description' => $post['source'],
'private' => $private,
'linkdate' => $post['date'],
'tags' => array_merge($post['tags'], ['tumblr', 'tumblr_quote'])
];
break;
case 'link':
$newLink = [
'title' => $post['title'],
'url' => $post['url'],
'description' => $post['description'],
'private' => $private,
'linkdate' => $post['date'],
'tags' => array_merge($post['tags'], ['tumblr', 'tumblr_link'])
];
break;
case 'chat':
$newLink = [
'title' => $post['title'],
'url' => $post['post_url'],
'description' => $post['body'],
'private' => $private,
'linkdate' => $post['date'],
'tags' => array_merge($post['tags'], ['tumblr', 'tumblr_chat'])
];
break;
case 'audio':
$url = empty($post['source_url']) ? $post['post_url'] : $post['source_url'];
$newLink = [
'title' => $post['source_title'],
'url' => $url,
'description' => $post['caption'].$post['player'][0]['embed_code'],
'private' => $private,
'linkdate' => $post['date'],
'tags' => array_merge($post['tags'], ['tumblr', 'tumblr_audio'])
];
break;
case 'video':
$url = empty($post['source_url']) ? $post['post_url'] : $post['source_url'];
$newLink = [
'title' => $post['source_title'],
'url' => $url,
'description' => $post['caption'].$post['player'][0]['embed_code'],
'private' => $private,
'linkdate' => $post['date'],
'tags' => array_merge($post['tags'], ['tumblr', 'tumblr_video'])
];
break;
case 'answer':
$newLink = [
'title' => $post['question'],
'url' => $post['post_url'],
'description' => $post['answer'],
'private' => $private,
'linkdate' => $post['date'],
'tags' => array_merge($post['tags'], ['tumblr', 'tumblr_answer'])
];
break;
default:
mylog('No URL found for '.var_export($post,true));
}
$existingLink = $linkDb->getLinkFromUrl($newLink['url']);
// If the link already exists, we don't do anything (we don't want to break anything)
if ($existingLink !== false) {
mylog($newLink['url'].' already exists.');
$alreadyCount++;
continue;
}
// no HTML in title
$newLink['title'] = strip_tags($newLink['title']);
// description in markdown
$newLink['description'] = $converter->convert($newLink['description']);
// tags are string separated with a space
$newLink['tags'] = implode(' ', $newLink['tags']);
// Add a new link
$newLinkDate = DateTime::createFromFormat('Y-m-d H:i:s T', $newLink['linkdate']);
while (!empty($linkDb[$newLinkDate->format(LinkDB::LINK_DATE_FORMAT)])) {
// Ensure the date/time is not already used
// - this hack is necessary as the date/time acts as a primary key
// - apply 1 second increments until an unused index is found
// See https://github.com/shaarli/Shaarli/issues/351
$newLinkDate->add(new DateInterval('PT1S'));
}
$linkDbDate = $newLinkDate->format(LinkDB::LINK_DATE_FORMAT);
$newLink['linkdate'] = $linkDbDate;
$linkDb[$linkDbDate] = $newLink;
mylog($newLink['url']. ' added!');
$importCount++;
}
}
// Finished
mylog('On '.$total_count.', added '.$importCount.', ignored '.$alreadyCount.' and '.($total_count-$importCount-$alreadyCount).' errors.');
// Saving
$linkDb->savedb($pagecache);

4
tumblr.ini.dev Normal file
View File

@ -0,0 +1,4 @@
tumblr = xxx.tumblr.com
api_key = xxx
private = true
shaarli_dir =

7
vendor/autoload.php vendored Normal file
View File

@ -0,0 +1,7 @@
<?php
// autoload.php @generated by Composer
require_once __DIR__ . '/composer' . '/autoload_real.php';
return ComposerAutoloaderInit8d9e9c7c059559c5a7982ce8b604d744::getLoader();

1
vendor/bin/html-to-markdown vendored Symbolic link
View File

@ -0,0 +1 @@
../league/html-to-markdown/bin/html-to-markdown

413
vendor/composer/ClassLoader.php vendored Normal file
View File

@ -0,0 +1,413 @@
<?php
/*
* This file is part of Composer.
*
* (c) Nils Adermann <naderman@naderman.de>
* Jordi Boggiano <j.boggiano@seld.be>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace Composer\Autoload;
/**
* ClassLoader implements a PSR-0, PSR-4 and classmap class loader.
*
* $loader = new \Composer\Autoload\ClassLoader();
*
* // register classes with namespaces
* $loader->add('Symfony\Component', __DIR__.'/component');
* $loader->add('Symfony', __DIR__.'/framework');
*
* // activate the autoloader
* $loader->register();
*
* // to enable searching the include path (eg. for PEAR packages)
* $loader->setUseIncludePath(true);
*
* In this example, if you try to use a class in the Symfony\Component
* namespace or one of its children (Symfony\Component\Console for instance),
* the autoloader will first look for the class under the component/
* directory, and it will then fallback to the framework/ directory if not
* found before giving up.
*
* This class is loosely based on the Symfony UniversalClassLoader.
*
* @author Fabien Potencier <fabien@symfony.com>
* @author Jordi Boggiano <j.boggiano@seld.be>
* @see http://www.php-fig.org/psr/psr-0/
* @see http://www.php-fig.org/psr/psr-4/
*/
class ClassLoader
{
// PSR-4
private $prefixLengthsPsr4 = array();
private $prefixDirsPsr4 = array();
private $fallbackDirsPsr4 = array();
// PSR-0
private $prefixesPsr0 = array();
private $fallbackDirsPsr0 = array();
private $useIncludePath = false;
private $classMap = array();
private $classMapAuthoritative = false;
public function getPrefixes()
{
if (!empty($this->prefixesPsr0)) {
return call_user_func_array('array_merge', $this->prefixesPsr0);
}
return array();
}
public function getPrefixesPsr4()
{
return $this->prefixDirsPsr4;
}
public function getFallbackDirs()
{
return $this->fallbackDirsPsr0;
}
public function getFallbackDirsPsr4()
{
return $this->fallbackDirsPsr4;
}
public function getClassMap()
{
return $this->classMap;
}
/**
* @param array $classMap Class to filename map
*/
public function addClassMap(array $classMap)
{
if ($this->classMap) {
$this->classMap = array_merge($this->classMap, $classMap);
} else {
$this->classMap = $classMap;
}
}
/**
* Registers a set of PSR-0 directories for a given prefix, either
* appending or prepending to the ones previously set for this prefix.
*
* @param string $prefix The prefix
* @param array|string $paths The PSR-0 root directories
* @param bool $prepend Whether to prepend the directories
*/
public function add($prefix, $paths, $prepend = false)
{
if (!$prefix) {
if ($prepend) {
$this->fallbackDirsPsr0 = array_merge(
(array) $paths,
$this->fallbackDirsPsr0
);
} else {
$this->fallbackDirsPsr0 = array_merge(
$this->fallbackDirsPsr0,
(array) $paths
);
}
return;
}
$first = $prefix[0];
if (!isset($this->prefixesPsr0[$first][$prefix])) {
$this->prefixesPsr0[$first][$prefix] = (array) $paths;
return;
}
if ($prepend) {
$this->prefixesPsr0[$first][$prefix] = array_merge(
(array) $paths,
$this->prefixesPsr0[$first][$prefix]
);
} else {
$this->prefixesPsr0[$first][$prefix] = array_merge(
$this->prefixesPsr0[$first][$prefix],
(array) $paths
);
}
}
/**
* Registers a set of PSR-4 directories for a given namespace, either
* appending or prepending to the ones previously set for this namespace.
*
* @param string $prefix The prefix/namespace, with trailing '\\'
* @param array|string $paths The PSR-4 base directories
* @param bool $prepend Whether to prepend the directories
*
* @throws \InvalidArgumentException
*/
public function addPsr4($prefix, $paths, $prepend = false)
{
if (!$prefix) {
// Register directories for the root namespace.
if ($prepend) {
$this->fallbackDirsPsr4 = array_merge(
(array) $paths,
$this->fallbackDirsPsr4
);
} else {
$this->fallbackDirsPsr4 = array_merge(
$this->fallbackDirsPsr4,
(array) $paths
);
}
} elseif (!isset($this->prefixDirsPsr4[$prefix])) {
// Register directories for a new namespace.
$length = strlen($prefix);
if ('\\' !== $prefix[$length - 1]) {
throw new \InvalidArgumentException("A non-empty PSR-4 prefix must end with a namespace separator.");
}
$this->prefixLengthsPsr4[$prefix[0]][$prefix] = $length;
$this->prefixDirsPsr4[$prefix] = (array) $paths;
} elseif ($prepend) {
// Prepend directories for an already registered namespace.
$this->prefixDirsPsr4[$prefix] = array_merge(
(array) $paths,
$this->prefixDirsPsr4[$prefix]
);
} else {
// Append directories for an already registered namespace.
$this->prefixDirsPsr4[$prefix] = array_merge(
$this->prefixDirsPsr4[$prefix],
(array) $paths
);
}
}
/**
* Registers a set of PSR-0 directories for a given prefix,
* replacing any others previously set for this prefix.
*
* @param string $prefix The prefix
* @param array|string $paths The PSR-0 base directories
*/
public function set($prefix, $paths)
{
if (!$prefix) {
$this->fallbackDirsPsr0 = (array) $paths;
} else {
$this->prefixesPsr0[$prefix[0]][$prefix] = (array) $paths;
}
}
/**
* Registers a set of PSR-4 directories for a given namespace,
* replacing any others previously set for this namespace.
*
* @param string $prefix The prefix/namespace, with trailing '\\'
* @param array|string $paths The PSR-4 base directories
*
* @throws \InvalidArgumentException
*/
public function setPsr4($prefix, $paths)
{
if (!$prefix) {
$this->fallbackDirsPsr4 = (array) $paths;
} else {
$length = strlen($prefix);
if ('\\' !== $prefix[$length - 1]) {
throw new \InvalidArgumentException("A non-empty PSR-4 prefix must end with a namespace separator.");
}
$this->prefixLengthsPsr4[$prefix[0]][$prefix] = $length;
$this->prefixDirsPsr4[$prefix] = (array) $paths;
}
}
/**
* Turns on searching the include path for class files.
*
* @param bool $useIncludePath
*/
public function setUseIncludePath($useIncludePath)
{
$this->useIncludePath = $useIncludePath;
}
/**
* Can be used to check if the autoloader uses the include path to check
* for classes.
*
* @return bool
*/
public function getUseIncludePath()
{
return $this->useIncludePath;
}
/**
* Turns off searching the prefix and fallback directories for classes
* that have not been registered with the class map.
*
* @param bool $classMapAuthoritative
*/
public function setClassMapAuthoritative($classMapAuthoritative)
{
$this->classMapAuthoritative = $classMapAuthoritative;
}
/**
* Should class lookup fail if not found in the current class map?
*
* @return bool
*/
public function isClassMapAuthoritative()
{
return $this->classMapAuthoritative;
}
/**
* Registers this instance as an autoloader.
*
* @param bool $prepend Whether to prepend the autoloader or not
*/
public function register($prepend = false)
{
spl_autoload_register(array($this, 'loadClass'), true, $prepend);
}
/**
* Unregisters this instance as an autoloader.
*/
public function unregister()
{
spl_autoload_unregister(array($this, 'loadClass'));
}
/**
* Loads the given class or interface.
*
* @param string $class The name of the class
* @return bool|null True if loaded, null otherwise
*/
public function loadClass($class)
{
if ($file = $this->findFile($class)) {
includeFile($file);
return true;
}
}
/**
* Finds the path to the file where the class is defined.
*
* @param string $class The name of the class
*
* @return string|false The path if found, false otherwise
*/
public function findFile($class)
{
// work around for PHP 5.3.0 - 5.3.2 https://bugs.php.net/50731
if ('\\' == $class[0]) {
$class = substr($class, 1);
}
// class map lookup
if (isset($this->classMap[$class])) {
return $this->classMap[$class];
}
if ($this->classMapAuthoritative) {
return false;
}
$file = $this->findFileWithExtension($class, '.php');
// Search for Hack files if we are running on HHVM
if ($file === null && defined('HHVM_VERSION')) {
$file = $this->findFileWithExtension($class, '.hh');
}
if ($file === null) {
// Remember that this class does not exist.
return $this->classMap[$class] = false;
}
return $file;
}
private function findFileWithExtension($class, $ext)
{
// PSR-4 lookup
$logicalPathPsr4 = strtr($class, '\\', DIRECTORY_SEPARATOR) . $ext;
$first = $class[0];
if (isset($this->prefixLengthsPsr4[$first])) {
foreach ($this->prefixLengthsPsr4[$first] as $prefix => $length) {
if (0 === strpos($class, $prefix)) {
foreach ($this->prefixDirsPsr4[$prefix] as $dir) {
if (file_exists($file = $dir . DIRECTORY_SEPARATOR . substr($logicalPathPsr4, $length))) {
return $file;
}
}
}
}
}
// PSR-4 fallback dirs
foreach ($this->fallbackDirsPsr4 as $dir) {
if (file_exists($file = $dir . DIRECTORY_SEPARATOR . $logicalPathPsr4)) {
return $file;
}
}
// PSR-0 lookup
if (false !== $pos = strrpos($class, '\\')) {
// namespaced class name
$logicalPathPsr0 = substr($logicalPathPsr4, 0, $pos + 1)
. strtr(substr($logicalPathPsr4, $pos + 1), '_', DIRECTORY_SEPARATOR);
} else {
// PEAR-like class name
$logicalPathPsr0 = strtr($class, '_', DIRECTORY_SEPARATOR) . $ext;
}
if (isset($this->prefixesPsr0[$first])) {
foreach ($this->prefixesPsr0[$first] as $prefix => $dirs) {
if (0 === strpos($class, $prefix)) {
foreach ($dirs as $dir) {
if (file_exists($file = $dir . DIRECTORY_SEPARATOR . $logicalPathPsr0)) {
return $file;
}
}
}
}
}
// PSR-0 fallback dirs
foreach ($this->fallbackDirsPsr0 as $dir) {
if (file_exists($file = $dir . DIRECTORY_SEPARATOR . $logicalPathPsr0)) {
return $file;
}
}
// PSR-0 include paths.
if ($this->useIncludePath && $file = stream_resolve_include_path($logicalPathPsr0)) {
return $file;
}
}
}
/**
* Scope isolated include.
*
* Prevents access to $this/self from included files.
*/
function includeFile($file)
{
include $file;
}

433
vendor/composer/LICENSE vendored Normal file
View File

@ -0,0 +1,433 @@
Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
Upstream-Name: Composer
Upstream-Contact: Jordi Boggiano <j.boggiano@seld.be>
Source: https://github.com/composer/composer
Files: *
Copyright: 2016, Nils Adermann <naderman@naderman.de>
2016, Jordi Boggiano <j.boggiano@seld.be>
License: Expat
Files: res/cacert.pem
Copyright: 2015, Mozilla Foundation
License: MPL-2.0
Files: src/Composer/Util/RemoteFilesystem.php
src/Composer/Util/TlsHelper.php
Copyright: 2016, Nils Adermann <naderman@naderman.de>
2016, Jordi Boggiano <j.boggiano@seld.be>
2013, Evan Coury <me@evancoury.com>
License: Expat and BSD-2-Clause
License: BSD-2-Clause
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
.
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
License: Expat
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is furnished
to do so, subject to the following conditions:
.
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
License: MPL-2.0
1. Definitions
--------------
.
1.1. "Contributor"
means each individual or legal entity that creates, contributes to
the creation of, or owns Covered Software.
.
1.2. "Contributor Version"
means the combination of the Contributions of others (if any) used
by a Contributor and that particular Contributor's Contribution.
.
1.3. "Contribution"
means Covered Software of a particular Contributor.
.
1.4. "Covered Software"
means Source Code Form to which the initial Contributor has attached
the notice in Exhibit A, the Executable Form of such Source Code
Form, and Modifications of such Source Code Form, in each case
including portions thereof.
.
1.5. "Incompatible With Secondary Licenses"
means
.
(a) that the initial Contributor has attached the notice described
in Exhibit B to the Covered Software; or
.
(b) that the Covered Software was made available under the terms of
version 1.1 or earlier of the License, but not also under the
terms of a Secondary License.
.
1.6. "Executable Form"
means any form of the work other than Source Code Form.
.
1.7. "Larger Work"
means a work that combines Covered Software with other material, in
a separate file or files, that is not Covered Software.
.
1.8. "License"
means this document.
.
1.9. "Licensable"
means having the right to grant, to the maximum extent possible,
whether at the time of the initial grant or subsequently, any and
all of the rights conveyed by this License.
.
1.10. "Modifications"
means any of the following:
.
(a) any file in Source Code Form that results from an addition to,
deletion from, or modification of the contents of Covered
Software; or
.
(b) any new file in Source Code Form that contains any Covered
Software.
.
1.11. "Patent Claims" of a Contributor
means any patent claim(s), including without limitation, method,
process, and apparatus claims, in any patent Licensable by such
Contributor that would be infringed, but for the grant of the
License, by the making, using, selling, offering for sale, having
made, import, or transfer of either its Contributions or its
Contributor Version.
.
1.12. "Secondary License"
means either the GNU General Public License, Version 2.0, the GNU
Lesser General Public License, Version 2.1, the GNU Affero General
Public License, Version 3.0, or any later versions of those
licenses.
.
1.13. "Source Code Form"
means the form of the work preferred for making modifications.
.
1.14. "You" (or "Your")
means an individual or a legal entity exercising rights under this
License. For legal entities, "You" includes any entity that
controls, is controlled by, or is under common control with You. For
purposes of this definition, "control" means (a) the power, direct
or indirect, to cause the direction or management of such entity,
whether by contract or otherwise, or (b) ownership of more than
fifty percent (50%) of the outstanding shares or beneficial
ownership of such entity.
.
2. License Grants and Conditions
--------------------------------
.
2.1. Grants
.
Each Contributor hereby grants You a world-wide, royalty-free,
non-exclusive license:
.
(a) under intellectual property rights (other than patent or trademark)
Licensable by such Contributor to use, reproduce, make available,
modify, display, perform, distribute, and otherwise exploit its
Contributions, either on an unmodified basis, with Modifications, or
as part of a Larger Work; and
.
(b) under Patent Claims of such Contributor to make, use, sell, offer
for sale, have made, import, and otherwise transfer either its
Contributions or its Contributor Version.
.
2.2. Effective Date
.
The licenses granted in Section 2.1 with respect to any Contribution
become effective for each Contribution on the date the Contributor first
distributes such Contribution.
.
2.3. Limitations on Grant Scope
.
The licenses granted in this Section 2 are the only rights granted under
this License. No additional rights or licenses will be implied from the
distribution or licensing of Covered Software under this License.
Notwithstanding Section 2.1(b) above, no patent license is granted by a
Contributor:
.
(a) for any code that a Contributor has removed from Covered Software;
or
.
(b) for infringements caused by: (i) Your and any other third party's
modifications of Covered Software, or (ii) the combination of its
Contributions with other software (except as part of its Contributor
Version); or
.
(c) under Patent Claims infringed by Covered Software in the absence of
its Contributions.
.
This License does not grant any rights in the trademarks, service marks,
or logos of any Contributor (except as may be necessary to comply with
the notice requirements in Section 3.4).
.
2.4. Subsequent Licenses
.
No Contributor makes additional grants as a result of Your choice to
distribute the Covered Software under a subsequent version of this
License (see Section 10.2) or under the terms of a Secondary License (if
permitted under the terms of Section 3.3).
.
2.5. Representation
.
Each Contributor represents that the Contributor believes its
Contributions are its original creation(s) or it has sufficient rights
to grant the rights to its Contributions conveyed by this License.
.
2.6. Fair Use
.
This License is not intended to limit any rights You have under
applicable copyright doctrines of fair use, fair dealing, or other
equivalents.
.
2.7. Conditions
.
Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted
in Section 2.1.
.
3. Responsibilities
-------------------
.
3.1. Distribution of Source Form
.
All distribution of Covered Software in Source Code Form, including any
Modifications that You create or to which You contribute, must be under
the terms of this License. You must inform recipients that the Source
Code Form of the Covered Software is governed by the terms of this
License, and how they can obtain a copy of this License. You may not
attempt to alter or restrict the recipients' rights in the Source Code
Form.
.
3.2. Distribution of Executable Form
.
If You distribute Covered Software in Executable Form then:
.
(a) such Covered Software must also be made available in Source Code
Form, as described in Section 3.1, and You must inform recipients of
the Executable Form how they can obtain a copy of such Source Code
Form by reasonable means in a timely manner, at a charge no more
than the cost of distribution to the recipient; and
.
(b) You may distribute such Executable Form under the terms of this
License, or sublicense it under different terms, provided that the
license for the Executable Form does not attempt to limit or alter
the recipients' rights in the Source Code Form under this License.
.
3.3. Distribution of a Larger Work
.
You may create and distribute a Larger Work under terms of Your choice,
provided that You also comply with the requirements of this License for
the Covered Software. If the Larger Work is a combination of Covered
Software with a work governed by one or more Secondary Licenses, and the
Covered Software is not Incompatible With Secondary Licenses, this
License permits You to additionally distribute such Covered Software
under the terms of such Secondary License(s), so that the recipient of
the Larger Work may, at their option, further distribute the Covered
Software under the terms of either this License or such Secondary
License(s).
.
3.4. Notices
.
You may not remove or alter the substance of any license notices
(including copyright notices, patent notices, disclaimers of warranty,
or limitations of liability) contained within the Source Code Form of
the Covered Software, except that You may alter any license notices to
the extent required to remedy known factual inaccuracies.
.
3.5. Application of Additional Terms
.
You may choose to offer, and to charge a fee for, warranty, support,
indemnity or liability obligations to one or more recipients of Covered
Software. However, You may do so only on Your own behalf, and not on
behalf of any Contributor. You must make it absolutely clear that any
such warranty, support, indemnity, or liability obligation is offered by
You alone, and You hereby agree to indemnify every Contributor for any
liability incurred by such Contributor as a result of warranty, support,
indemnity or liability terms You offer. You may include additional
disclaimers of warranty and limitations of liability specific to any
jurisdiction.
.
4. Inability to Comply Due to Statute or Regulation
---------------------------------------------------
.
If it is impossible for You to comply with any of the terms of this
License with respect to some or all of the Covered Software due to
statute, judicial order, or regulation then You must: (a) comply with
the terms of this License to the maximum extent possible; and (b)
describe the limitations and the code they affect. Such description must
be placed in a text file included with all distributions of the Covered
Software under this License. Except to the extent prohibited by statute
or regulation, such description must be sufficiently detailed for a
recipient of ordinary skill to be able to understand it.
.
5. Termination
--------------
.
5.1. The rights granted under this License will terminate automatically
if You fail to comply with any of its terms. However, if You become
compliant, then the rights granted under this License from a particular
Contributor are reinstated (a) provisionally, unless and until such
Contributor explicitly and finally terminates Your grants, and (b) on an
ongoing basis, if such Contributor fails to notify You of the
non-compliance by some reasonable means prior to 60 days after You have
come back into compliance. Moreover, Your grants from a particular
Contributor are reinstated on an ongoing basis if such Contributor
notifies You of the non-compliance by some reasonable means, this is the
first time You have received notice of non-compliance with this License
from such Contributor, and You become compliant prior to 30 days after
Your receipt of the notice.
.
5.2. If You initiate litigation against any entity by asserting a patent
infringement claim (excluding declaratory judgment actions,
counter-claims, and cross-claims) alleging that a Contributor Version
directly or indirectly infringes any patent, then the rights granted to
You by any and all Contributors for the Covered Software under Section
2.1 of this License shall terminate.
.
5.3. In the event of termination under Sections 5.1 or 5.2 above, all
end user license agreements (excluding distributors and resellers) which
have been validly granted by You or Your distributors under this License
prior to termination shall survive termination.
.
************************************************************************
* *
* 6. Disclaimer of Warranty *
* ------------------------- *
* *
* Covered Software is provided under this License on an "as is" *
* basis, without warranty of any kind, either expressed, implied, or *
* statutory, including, without limitation, warranties that the *
* Covered Software is free of defects, merchantable, fit for a *
* particular purpose or non-infringing. The entire risk as to the *
* quality and performance of the Covered Software is with You. *
* Should any Covered Software prove defective in any respect, You *
* (not any Contributor) assume the cost of any necessary servicing, *
* repair, or correction. This disclaimer of warranty constitutes an *
* essential part of this License. No use of any Covered Software is *
* authorized under this License except under this disclaimer. *
* *
************************************************************************
.
************************************************************************
* *
* 7. Limitation of Liability *
* -------------------------- *
* *
* Under no circumstances and under no legal theory, whether tort *
* (including negligence), contract, or otherwise, shall any *
* Contributor, or anyone who distributes Covered Software as *
* permitted above, be liable to You for any direct, indirect, *
* special, incidental, or consequential damages of any character *
* including, without limitation, damages for lost profits, loss of *
* goodwill, work stoppage, computer failure or malfunction, or any *
* and all other commercial damages or losses, even if such party *
* shall have been informed of the possibility of such damages. This *
* limitation of liability shall not apply to liability for death or *
* personal injury resulting from such party's negligence to the *
* extent applicable law prohibits such limitation. Some *
* jurisdictions do not allow the exclusion or limitation of *
* incidental or consequential damages, so this exclusion and *
* limitation may not apply to You. *
* *
************************************************************************
.
8. Litigation
-------------
.
Any litigation relating to this License may be brought only in the
courts of a jurisdiction where the defendant maintains its principal
place of business and such litigation shall be governed by laws of that
jurisdiction, without reference to its conflict-of-law provisions.
Nothing in this Section shall prevent a party's ability to bring
cross-claims or counter-claims.
.
9. Miscellaneous
----------------
.
This License represents the complete agreement concerning the subject
matter hereof. If any provision of this License is held to be
unenforceable, such provision shall be reformed only to the extent
necessary to make it enforceable. Any law or regulation which provides
that the language of a contract shall be construed against the drafter
shall not be used to construe this License against a Contributor.
.
10. Versions of the License
---------------------------
.
10.1. New Versions
.
Mozilla Foundation is the license steward. Except as provided in Section
10.3, no one other than the license steward has the right to modify or
publish new versions of this License. Each version will be given a
distinguishing version number.
.
10.2. Effect of New Versions
.
You may distribute the Covered Software under the terms of the version
of the License under which You originally received the Covered Software,
or under the terms of any subsequent version published by the license
steward.
.
10.3. Modified Versions
.
If you create software not governed by this License, and you want to
create a new license for such software, you may create and use a
modified version of this License if you rename the license and remove
any references to the name of the license steward (except to note that
such modified license differs from this License).
.
10.4. Distributing Source Code Form that is Incompatible With Secondary
Licenses
.
If You choose to distribute Source Code Form that is Incompatible With
Secondary Licenses under the terms of this version of the License, the
notice described in Exhibit B of this License must be attached.
.
Exhibit A - Source Code Form License Notice
-------------------------------------------
.
This Source Code Form is subject to the terms of the Mozilla Public
License, v. 2.0. If a copy of the MPL was not distributed with this
file, You can obtain one at http://mozilla.org/MPL/2.0/.
.
If it is not possible or desirable to put the notice in a particular
file, then You may include the notice in a location (such as a LICENSE
file in a relevant directory) where a recipient would be likely to look
for such a notice.
.
You may add additional accurate notices of copyright ownership.
.
Exhibit B - "Incompatible With Secondary Licenses" Notice
---------------------------------------------------------
.
This Source Code Form is "Incompatible With Secondary Licenses", as
defined by the Mozilla Public License, v. 2.0.

9
vendor/composer/autoload_classmap.php vendored Normal file
View File

@ -0,0 +1,9 @@
<?php
// autoload_classmap.php @generated by Composer
$vendorDir = dirname(dirname(__FILE__));
$baseDir = dirname($vendorDir);
return array(
);

View File

@ -0,0 +1,9 @@
<?php
// autoload_namespaces.php @generated by Composer
$vendorDir = dirname(dirname(__FILE__));
$baseDir = dirname($vendorDir);
return array(
);

10
vendor/composer/autoload_psr4.php vendored Normal file
View File

@ -0,0 +1,10 @@
<?php
// autoload_psr4.php @generated by Composer
$vendorDir = dirname(dirname(__FILE__));
$baseDir = dirname($vendorDir);
return array(
'League\\HTMLToMarkdown\\' => array($vendorDir . '/league/html-to-markdown/src'),
);

45
vendor/composer/autoload_real.php vendored Normal file
View File

@ -0,0 +1,45 @@
<?php
// autoload_real.php @generated by Composer
class ComposerAutoloaderInit8d9e9c7c059559c5a7982ce8b604d744
{
private static $loader;
public static function loadClassLoader($class)
{
if ('Composer\Autoload\ClassLoader' === $class) {
require __DIR__ . '/ClassLoader.php';
}
}
public static function getLoader()
{
if (null !== self::$loader) {
return self::$loader;
}
spl_autoload_register(array('ComposerAutoloaderInit8d9e9c7c059559c5a7982ce8b604d744', 'loadClassLoader'), true, true);
self::$loader = $loader = new \Composer\Autoload\ClassLoader();
spl_autoload_unregister(array('ComposerAutoloaderInit8d9e9c7c059559c5a7982ce8b604d744', 'loadClassLoader'));
$map = require __DIR__ . '/autoload_namespaces.php';
foreach ($map as $namespace => $path) {
$loader->set($namespace, $path);
}
$map = require __DIR__ . '/autoload_psr4.php';
foreach ($map as $namespace => $path) {
$loader->setPsr4($namespace, $path);
}
$classMap = require __DIR__ . '/autoload_classmap.php';
if ($classMap) {
$loader->addClassMap($classMap);
}
$loader->register(true);
return $loader;
}
}

68
vendor/composer/installed.json vendored Normal file
View File

@ -0,0 +1,68 @@
[
{
"name": "league/html-to-markdown",
"version": "4.2.2",
"version_normalized": "4.2.2.0",
"source": {
"type": "git",
"url": "https://github.com/thephpleague/html-to-markdown.git",
"reference": "8dfe3b1e6d459b320bec1a4b5499cd9d62796ac0"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/thephpleague/html-to-markdown/zipball/8dfe3b1e6d459b320bec1a4b5499cd9d62796ac0",
"reference": "8dfe3b1e6d459b320bec1a4b5499cd9d62796ac0",
"shasum": ""
},
"require": {
"ext-dom": "*",
"ext-xml": "*",
"php": ">=5.3.3"
},
"require-dev": {
"mikehaertl/php-shellcommand": "~1.1.0",
"phpunit/phpunit": "4.*",
"scrutinizer/ocular": "~1.1"
},
"time": "2016-09-27 12:38:24",
"bin": [
"bin/html-to-markdown"
],
"type": "library",
"extra": {
"branch-alias": {
"dev-master": "4.3-dev"
}
},
"installation-source": "dist",
"autoload": {
"psr-4": {
"League\\HTMLToMarkdown\\": "src/"
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "Colin O'Dell",
"email": "colinodell@gmail.com",
"homepage": "http://www.colinodell.com",
"role": "Lead Developer"
},
{
"name": "Nick Cernis",
"email": "nick@cern.is",
"homepage": "http://modernnerd.net",
"role": "Original Author"
}
],
"description": "An HTML-to-markdown conversion helper for PHP",
"homepage": "https://github.com/thephpleague/html-to-markdown",
"keywords": [
"html",
"markdown"
]
}
]

View File

@ -0,0 +1,170 @@
# Change Log
All notable changes to this project will be documented in this file.
Updates should follow the [Keep a CHANGELOG](http://keepachangelog.com/) principles.
## [Unreleased][unreleased]
## [4.2.2]
### Fixed
- Fixed sanitization bug which sometimes removes desired content (#63, #101)
## [4.2.1]
### Fixed
- Fixed path to autoload.php when used as a library (#98)
- Fixed edge case for tags containing only whitespace (#99)
### Removed
- Removed double HTML entity decoding, as this is not desireable (#60)
## [4.2.0]
### Added
- Added the ability to invoke HtmlConverter objects as functions (#85)
### Fixed
- Fixed improper handling of nested list items (#19 and #84)
- Fixed preceeding or trailing spaces within emphasis tags (#83)
## [4.1.1]
### Fixed
- Fixed conversion of empty paragraphs (#78)
- Fixed `preg_replace` so it wouldn't break UTF-8 characters (#79)
## [4.1.0]
### Added
- Added `bin/html-to-markdown` script
### Changed
- Changed default italic character to `_` (#58)
## [4.0.1]
### Fixed
- Added escaping to avoid * and _ in a text being rendered as emphasis (#48)
### Removed
- Removed the demo (#51)
- `.styleci.yml` and `CONTRIBUTING.md` are no longer included in distributions (#50)
## [4.0.0]
This release changes the visibility of several methods/properties. #42 and #43 brought to light that some visiblities were
not ideally set, so this releases fixes that. Moving forwards this should reduce the chance of introducing BC-breaking changes.
### Added
- Added new `HtmlConverter::getEnvironment()` method to expose the `Environment` (#42, #43)
### Changed
- Changed `Environment::addConverter()` from `protected` to `public`, enabling custom converters to be added (#42, #43)
- Changed `HtmlConverter::createDOMDocument()` from `protected` to `private`
- Changed `Element::nextCached` from `protected` to `private`
- Made the `Environment` class `final`
## [3.1.1]
### Fixed
- Empty HTML strings now result in empty Markdown documents (#40, #41)
## [3.1.0]
### Added
- Added new `equals` method to `Element` to check for equality
### Changes
- Use Linux line endings consistently instead of plaform-specific line endings (#36)
### Fixed
- Cleaned up code style
## [3.0.0]
### Changed
- Changed namespace to `League\HTMLToMarkdown`
- Changed packagist name to `league/html-to-markdown`
- Re-organized code into several separate classes
- `<a>` tags with identical href and inner text are now rendered using angular bracket syntax (#31)
- `<div>` elements are now treated as block-level elements (#33)
## [2.2.2]
### Added
- Added support for PHP 5.6 and HHVM
- Enabled testing against PHP 7 nightlies
- Added this CHANGELOG.md
### Fixed
- Fixed whitespace preservation between inline elements (#9 and #10)
## [2.2.1]
### Fixed
- Preserve placeholder links (#22)
## [2.2.0]
### Added
- Added CircleCI config
### Changed
- `<pre>` blocks are now treated as code elements
### Removed
- Dropped support for PHP 5.2
- Removed incorrect README comment regarding `#text` nodes (#17)
## [2.1.2]
### Added
- Added the ability to blacklist/remove specific node types (#11)
### Changed
- Line breaks are now placed after divs instead of before them
- Newlines inside of link texts are now removed
- Updated the minimum PHPUnit version to 4.*
## [2.1.1]
### Added
- Added options to customize emphasis characters
## [2.1.0]
### Added
- Added option to strip HTML tags without Markdown equivalents
- Added `convert()` method for converter reuse
- Added ability to set options after instance construction
- Documented the required PHP extensions (#4)
### Changed
- ATX style now used for h1 and h2 tags inside blockquotes
### Fixed
- Newlines inside blockquotes are now started with a bracket
- Fixed some incorrect docblocks
- `__toString()` now returns an empty string if input is empty
- Convert head tag if body tag is empty (#7)
- Preserve special characters inside tags without md equivalents (#6)
## [2.0.1]
### Fixed
- Fixed first line indentation for multi-line code blocks
- Fixed consecutive anchors get separating spaces stripped (#3)
## [2.0.0]
### Added
- Initial release
[unreleased]: https://github.com/thephpleague/html-to-markdown/compare/4.2.2...master
[4.2.2]: https://github.com/thephpleague/html-to-markdown/compare/4.2.1...4.2.2
[4.2.1]: https://github.com/thephpleague/html-to-markdown/compare/4.2.0...4.2.1
[4.2.0]: https://github.com/thephpleague/html-to-markdown/compare/4.1.1...4.2.0
[4.1.1]: https://github.com/thephpleague/html-to-markdown/compare/4.1.0...4.1.1
[4.1.0]: https://github.com/thephpleague/html-to-markdown/compare/4.0.1...4.1.0
[4.0.1]: https://github.com/thephpleague/html-to-markdown/compare/4.0.0...4.0.1
[4.0.0]: https://github.com/thephpleague/html-to-markdown/compare/3.1.1...4.0.0
[3.1.1]: https://github.com/thephpleague/html-to-markdown/compare/3.1.0...3.1.1
[3.1.0]: https://github.com/thephpleague/html-to-markdown/compare/3.0.0...3.1.0
[3.0.0]: https://github.com/thephpleague/html-to-markdown/compare/2.2.2...3.0.0
[2.2.2]: https://github.com/thephpleague/html-to-markdown/compare/2.2.1...2.2.2
[2.2.1]: https://github.com/thephpleague/html-to-markdown/compare/2.2.0...2.2.1
[2.2.0]: https://github.com/thephpleague/html-to-markdown/compare/2.1.2...2.2.0
[2.1.2]: https://github.com/thephpleague/html-to-markdown/compare/2.1.1...2.1.2
[2.1.1]: https://github.com/thephpleague/html-to-markdown/compare/2.1.0...2.1.1
[2.1.0]: https://github.com/thephpleague/html-to-markdown/compare/2.0.1...2.1.0
[2.0.1]: https://github.com/thephpleague/html-to-markdown/compare/2.0.0...2.0.1
[2.0.0]: https://github.com/thephpleague/html-to-markdown/compare/775f91e...2.0.0

View File

@ -0,0 +1,22 @@
# Contributor Code of Conduct
As contributors and maintainers of this project, and in the interest of fostering an open and welcoming community, we pledge to respect all people who contribute through reporting issues, posting feature requests, updating documentation, submitting pull requests or patches, and other activities.
We are committed to making participation in this project a harassment-free experience for everyone, regardless of level of experience, gender, gender identity and expression, sexual orientation, disability, personal appearance, body size, race, ethnicity, age, religion, or nationality.
Examples of unacceptable behavior by participants include:
* The use of sexualized language or imagery
* Personal attacks
* Trolling or insulting/derogatory comments
* Public or private harassment
* Publishing other's private information, such as physical or electronic addresses, without explicit permission
* Other unethical or unprofessional conduct.
Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct. By adopting this Code of Conduct, project maintainers commit themselves to fairly and consistently applying these principles to every aspect of managing this project. Project maintainers who do not follow or enforce the Code of Conduct may be permanently removed from the project team.
This code of conduct applies both within project spaces and in public spaces when an individual is representing the project or its community.
Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by opening an issue or contacting one or more of the project maintainers.
This Code of Conduct is adapted from the [Contributor Covenant](http://contributor-covenant.org), version 1.2.0, available at [http://contributor-covenant.org/version/1/2/0/](http://contributor-covenant.org/version/1/2/0/)

22
vendor/league/html-to-markdown/LICENSE vendored Normal file
View File

@ -0,0 +1,22 @@
The MIT License (MIT)
Copyright (c) 2015 Colin O'Dell
Originally created by Nick Cernis
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

151
vendor/league/html-to-markdown/README.md vendored Normal file
View File

@ -0,0 +1,151 @@
HTML To Markdown for PHP
========================
[![Join the chat at https://gitter.im/thephpleague/html-to-markdown](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/thephpleague/html-to-markdown?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
[![Latest Version](https://img.shields.io/packagist/v/league/html-to-markdown.svg?style=flat-square)](https://packagist.org/packages/league/html-to-markdown)
[![Software License](http://img.shields.io/badge/license-MIT-brightgreen.svg?style=flat-square)](LICENSE)
[![Build Status](https://img.shields.io/travis/thephpleague/html-to-markdown/master.svg?style=flat-square)](https://travis-ci.org/thephpleague/html-to-markdown)
[![Coverage Status](https://img.shields.io/scrutinizer/coverage/g/thephpleague/html-to-markdown.svg?style=flat-square)](https://scrutinizer-ci.com/g/thephpleague/html-to-markdown/code-structure)
[![Quality Score](https://img.shields.io/scrutinizer/g/thephpleague/html-to-markdown.svg?style=flat-square)](https://scrutinizer-ci.com/g/thephpleague/html-to-markdown)
[![Total Downloads](https://img.shields.io/packagist/dt/league/html-to-markdown.svg?style=flat-square)](https://packagist.org/packages/league/html-to-markdown)
Library which converts HTML to [Markdown](http://daringfireball.net/projects/markdown/) for your sanity and convenience.
**Requires**: PHP 5.3+
**Lead Developer**: [@colinodell](http://twitter.com/colinodell)
**Original Author**: [@nickcernis](http://twitter.com/nickcernis)
### Why convert HTML to Markdown?
*"What alchemy is this?"* you mutter. *"I can see why you'd convert [Markdown to HTML](https://github.com/thephpleague/commonmark),"* you continue, already labouring the question somewhat, *"but why go the other way?"*
Typically you would convert HTML to Markdown if:
1. You have an existing HTML document that needs to be edited by people with good taste.
2. You want to store new content in HTML format but edit it as Markdown.
3. You want to convert HTML email to plain text email.
4. You know a guy who's been converting HTML to Markdown for years, and now he can speak Elvish. You'd quite like to be able to speak Elvish.
5. You just really like Markdown.
### How to use it
Require the library by issuing this command:
```bash
composer require league/html-to-markdown
```
Add `require 'vendor/autoload.php';` to the top of your script.
Next, create a new HtmlConverter instance, passing in your valid HTML code to its `convert()` function:
use League\HTMLToMarkdown\HtmlConverter;
$converter = new HtmlConverter();
$html = "<h3>Quick, to the Batpoles!</h3>";
$markdown = $converter->convert($html);
The `$markdown` variable now contains the Markdown version of your HTML as a string:
echo $markdown; // ==> ### Quick, to the Batpoles!
The included `demo` directory contains an HTML->Markdown conversion form to try out.
### Conversion options
By default, HTML To Markdown preserves HTML tags without Markdown equivalents, like `<span>` and `<div>`.
To strip HTML tags that don't have a Markdown equivalent while preserving the content inside them, set `strip_tags` to true, like this:
$converter = new HtmlConverter(array('strip_tags' => true));
$html = '<span>Turnips!</span>';
$markdown = $converter->convert($html); // $markdown now contains "Turnips!"
Or more explicitly, like this:
$converter = new HtmlConverter();
$converter->getConfig()->setOption('strip_tags', true);
$html = '<span>Turnips!</span>';
$markdown = $converter->convert($html); // $markdown now contains "Turnips!"
Note that only the tags themselves are stripped, not the content they hold.
To strip tags and their content, pass a space-separated list of tags in `remove_nodes`, like this:
$converter = new HtmlConverter(array('remove_nodes' => 'span div'));
$html = '<span>Turnips!</span><div>Monkeys!</div>';
$markdown = $converter->convert($html); // $markdown now contains ""
### Style options
Bold and italic tags are converted using the asterisk syntax by default. Change this to the underlined syntax using the `bold_style` and `italic_style` options.
$converter = new HtmlConverter();
$converter->getConfig()->setOption('italic_style', '_');
$converter->getConfig()->setOption('bold_style', '__');
$html = '<em>Italic</em> and a <strong>bold</strong>';
$markdown = $converter->convert($html); // $markdown now contains "_Italic_ and a __bold__"
### Limitations
- Markdown Extra, MultiMarkdown and other variants aren't supported just Markdown.
### Known issues
- Nested lists and lists containing multiple paragraphs aren't converted correctly.
- Lists inside blockquotes aren't converted correctly.
- Any reported [open issues here](https://github.com/thephpleague/html-to-markdown/issues?state=open).
[Report your issue or request a feature here.](https://github.com/thephpleague/html-to-markdown/issues/new) Issues with patches or failing tests are especially welcome.
### Style notes
- Setext (underlined) headers are the default for H1 and H2. If you prefer the ATX style for H1 and H2 (# Header 1 and ## Header 2), set `header_style` to 'atx' in the options array when you instantiate the object:
`$converter = new HtmlConverter(array('header_style'=>'atx'));`
Headers of H3 priority and lower always use atx style.
- Links and images are referenced inline. Footnote references (where image src and anchor href attributes are listed in the footnotes) are not used.
- Blockquotes aren't line wrapped it makes the converted Markdown easier to edit.
### Dependencies
HTML To Markdown requires PHP's [xml](http://www.php.net/manual/en/xml.installation.php), [lib-xml](http://www.php.net/manual/en/libxml.installation.php), and [dom](http://www.php.net/manual/en/dom.installation.php) extensions, all of which are enabled by default on most distributions.
Errors such as "Fatal error: Class 'DOMDocument' not found" on distributions such as CentOS that disable PHP's xml extension can be resolved by installing php-xml.
### Contributors
Many thanks to all [contributors](https://github.com/thephpleague/html-to-markdown/graphs/contributors) so far. Further improvements and feature suggestions are very welcome.
### How it works
HTML To Markdown creates a DOMDocument from the supplied HTML, walks through the tree, and converts each node to a text node containing the equivalent markdown, starting from the most deeply nested node and working inwards towards the root node.
### To-do
- Support for nested lists and lists inside blockquotes.
- Offer an option to preserve tags as HTML if they contain attributes that can't be represented with Markdown (e.g. `style`).
### Trying to convert Markdown to HTML?
Use one of these great libraries:
- [league/commonmark](https://github.com/thephpleague/commonmark) (recommended)
- [cebe/markdown](https://github.com/cebe/markdown)
- [PHP Markdown](https://michelf.ca/projects/php-markdown/)
- [Parsedown](https://github.com/erusev/parsedown)
No guarantees about the Elvish, though.

View File

@ -0,0 +1,108 @@
#!/usr/bin/env php
<?php
requireAutoloader();
ini_set('display_errors', 'stderr');
foreach ($argv as $i => $arg) {
if ($i === 0) {
continue;
}
if (substr($arg, 0, 1) === '-') {
switch ($arg) {
case '-h':
case '--help':
echo getHelpText();
exit(0);
default:
fail('Unknown option: ' . $arg);
}
} else {
$src = $argv[1];
}
}
if (isset($src)) {
if (!file_exists($src)) {
fail('File not found: ' . $src);
}
$html = file_get_contents($src);
} else {
$stdin = fopen('php://stdin', 'r');
stream_set_blocking($stdin, false);
$html = stream_get_contents($stdin);
fclose($stdin);
if (empty($html)) {
fail(getHelpText());
}
}
$converter = new League\HTMLToMarkdown\HtmlConverter();
echo $converter->convert($html);
/**
* Get help and usage info
*
* @return string
*/
function getHelpText()
{
return <<<HELP
HTML To Markdown
Usage: html-to-markdown [OPTIONS] [FILE]
-h, --help Shows help and usage information
If no file is given, input will be read from STDIN
Examples:
Converting a file named document.html:
html-to-markdown document.html
Converting a file and saving its output:
html-to-markdown document.html > output.md
Converting from STDIN:
echo -e '<h1>Hello World!</h1>' | html-to-markdown
Converting from STDIN and saving the output:
echo -e '<h1>Hello World!</h1>' | html-to-markdown > output.md
HELP;
}
/**
* @param string $message Error message
*/
function fail($message)
{
fwrite(STDERR, $message . "\n");
exit(1);
}
function requireAutoloader()
{
$autoloadPaths = array(
// Local package usage
__DIR__ . '/../vendor/autoload.php',
// Package was included as a library
__DIR__ . '/../../../autoload.php',
);
foreach ($autoloadPaths as $path) {
if (file_exists($path)) {
require_once $path;
break;
}
}
}

View File

@ -0,0 +1,48 @@
{
"name": "league/html-to-markdown",
"type": "library",
"description": "An HTML-to-markdown conversion helper for PHP",
"keywords": ["markdown", "html"],
"homepage": "https://github.com/thephpleague/html-to-markdown",
"license": "MIT",
"authors": [
{
"name": "Colin O'Dell",
"email": "colinodell@gmail.com",
"homepage": "http://www.colinodell.com",
"role": "Lead Developer"
},
{
"name": "Nick Cernis",
"email": "nick@cern.is",
"homepage": "http://modernnerd.net",
"role": "Original Author"
}
],
"autoload": {
"psr-4": {
"League\\HTMLToMarkdown\\": "src/"
}
},
"autoload-dev": {
"psr-4": {
"League\\HTMLToMarkdown\\Test\\": "tests"
}
},
"require": {
"php": ">=5.3.3",
"ext-dom": "*",
"ext-xml": "*"
},
"require-dev": {
"mikehaertl/php-shellcommand": "~1.1.0",
"phpunit/phpunit": "4.*",
"scrutinizer/ocular": "~1.1"
},
"bin": ["bin/html-to-markdown"],
"extra": {
"branch-alias": {
"dev-master": "4.3-dev"
}
}
}

View File

@ -0,0 +1,60 @@
<?php
namespace League\HTMLToMarkdown;
class Configuration
{
protected $config;
/**
* @param array $config
*/
public function __construct(array $config = array())
{
$this->config = $config;
}
/**
* @param array $config
*/
public function merge(array $config = array())
{
$this->config = array_replace_recursive($this->config, $config);
}
/**
* @param array $config
*/
public function replace(array $config = array())
{
$this->config = $config;
}
/**
* @param string $key
* @param mixed $value
*/
public function setOption($key, $value)
{
$this->config[$key] = $value;
}
/**
* @param string|null $key
* @param mixed|null $default
*
* @return mixed|null
*/
public function getOption($key = null, $default = null)
{
if ($key === null) {
return $this->config;
}
if (!isset($this->config[$key])) {
return $default;
}
return $this->config[$key];
}
}

View File

@ -0,0 +1,11 @@
<?php
namespace League\HTMLToMarkdown;
interface ConfigurationAwareInterface
{
/**
* @param Configuration $config
*/
public function setConfig(Configuration $config);
}

View File

@ -0,0 +1,44 @@
<?php
namespace League\HTMLToMarkdown\Converter;
use League\HTMLToMarkdown\ElementInterface;
class BlockquoteConverter implements ConverterInterface
{
/**
* @param ElementInterface $element
*
* @return string
*/
public function convert(ElementInterface $element)
{
// Contents should have already been converted to Markdown by this point,
// so we just need to add '>' symbols to each line.
$markdown = '';
$quote_content = trim($element->getValue());
$lines = preg_split('/\r\n|\r|\n/', $quote_content);
$total_lines = count($lines);
foreach ($lines as $i => $line) {
$markdown .= '> ' . $line . "\n";
if ($i + 1 === $total_lines) {
$markdown .= "\n";
}
}
return $markdown;
}
/**
* @return string[]
*/
public function getSupportedTags()
{
return array('blockquote');
}
}

View File

@ -0,0 +1,26 @@
<?php
namespace League\HTMLToMarkdown\Converter;
use League\HTMLToMarkdown\ElementInterface;
class CommentConverter implements ConverterInterface
{
/**
* @param ElementInterface $element
*
* @return string
*/
public function convert(ElementInterface $element)
{
return '';
}
/**
* @return string[]
*/
public function getSupportedTags()
{
return array('#comment');
}
}

View File

@ -0,0 +1,20 @@
<?php
namespace League\HTMLToMarkdown\Converter;
use League\HTMLToMarkdown\ElementInterface;
interface ConverterInterface
{
/**
* @param ElementInterface $element
*
* @return string
*/
public function convert(ElementInterface $element);
/**
* @return string[]
*/
public function getSupportedTags();
}

View File

@ -0,0 +1,50 @@
<?php
namespace League\HTMLToMarkdown\Converter;
use League\HTMLToMarkdown\Configuration;
use League\HTMLToMarkdown\ConfigurationAwareInterface;
use League\HTMLToMarkdown\ElementInterface;
class DefaultConverter implements ConverterInterface, ConfigurationAwareInterface
{
const DEFAULT_CONVERTER = '_default';
/**
* @var Configuration
*/
protected $config;
/**
* @param Configuration $config
*/
public function setConfig(Configuration $config)
{
$this->config = $config;
}
/**
* @param ElementInterface $element
*
* @return string
*/
public function convert(ElementInterface $element)
{
// If strip_tags is false (the default), preserve tags that don't have Markdown equivalents,
// such as <span> nodes on their own. C14N() canonicalizes the node to a string.
// See: http://www.php.net/manual/en/domnode.c14n.php
if ($this->config->getOption('strip_tags', false)) {
return $element->getValue();
}
return html_entity_decode($element->getChildrenAsString());
}
/**
* @return string[]
*/
public function getSupportedTags()
{
return array(self::DEFAULT_CONVERTER);
}
}

View File

@ -0,0 +1,45 @@
<?php
namespace League\HTMLToMarkdown\Converter;
use League\HTMLToMarkdown\Configuration;
use League\HTMLToMarkdown\ConfigurationAwareInterface;
use League\HTMLToMarkdown\ElementInterface;
class DivConverter implements ConverterInterface, ConfigurationAwareInterface
{
/**
* @var Configuration
*/
protected $config;
/**
* @param Configuration $config
*/
public function setConfig(Configuration $config)
{
$this->config = $config;
}
/**
* @param ElementInterface $element
*
* @return string
*/
public function convert(ElementInterface $element)
{
if ($this->config->getOption('strip_tags', false)) {
return $element->getValue() . "\n\n";
}
return html_entity_decode($element->getChildrenAsString());
}
/**
* @return string[]
*/
public function getSupportedTags()
{
return array('div');
}
}

View File

@ -0,0 +1,57 @@
<?php
namespace League\HTMLToMarkdown\Converter;
use League\HTMLToMarkdown\Configuration;
use League\HTMLToMarkdown\ConfigurationAwareInterface;
use League\HTMLToMarkdown\ElementInterface;
class EmphasisConverter implements ConverterInterface, ConfigurationAwareInterface
{
/**
* @var Configuration
*/
protected $config;
/**
* @param Configuration $config
*/
public function setConfig(Configuration $config)
{
$this->config = $config;
}
/**
* @param ElementInterface $element
*
* @return string
*/
public function convert(ElementInterface $element)
{
$tag = $element->getTagName();
$value = $element->getValue();
if (!trim($value)) {
return '';
}
if ($tag === 'i' || $tag === 'em') {
$style = $this->config->getOption('italic_style');
} else {
$style = $this->config->getOption('bold_style');
}
$prefix = ltrim($value) !== $value ? ' ' : '';
$suffix = rtrim($value) !== $value ? ' ' : '';
return $prefix . $style . trim($value) . $style . $suffix;
}
/**
* @return string[]
*/
public function getSupportedTags()
{
return array('em', 'i', 'strong', 'b');
}
}

View File

@ -0,0 +1,26 @@
<?php
namespace League\HTMLToMarkdown\Converter;
use League\HTMLToMarkdown\ElementInterface;
class HardBreakConverter implements ConverterInterface
{
/**
* @param ElementInterface $element
*
* @return string
*/
public function convert(ElementInterface $element)
{
return " \n";
}
/**
* @return string[]
*/
public function getSupportedTags()
{
return array('br');
}
}

View File

@ -0,0 +1,78 @@
<?php
namespace League\HTMLToMarkdown\Converter;
use League\HTMLToMarkdown\Configuration;
use League\HTMLToMarkdown\ConfigurationAwareInterface;
use League\HTMLToMarkdown\ElementInterface;
class HeaderConverter implements ConverterInterface, ConfigurationAwareInterface
{
const STYLE_ATX = 'atx';
const STYLE_SETEXT = 'setext';
/**
* @var Configuration
*/
protected $config;
/**
* @param Configuration $config
*/
public function setConfig(Configuration $config)
{
$this->config = $config;
}
/**
* @param ElementInterface $element
*
* @return string
*/
public function convert(ElementInterface $element)
{
$level = (int) substr($element->getTagName(), 1, 1);
$style = $this->config->getOption('header_style', self::STYLE_SETEXT);
if (($level === 1 || $level === 2) && !$element->isDescendantOf('blockquote') && $style === self::STYLE_SETEXT) {
return $this->createSetextHeader($level, $element->getValue());
} else {
return $this->createAtxHeader($level, $element->getValue());
}
}
/**
* @return string[]
*/
public function getSupportedTags()
{
return array('h1', 'h2', 'h3', 'h4', 'h5', 'h6');
}
/**
* @param int $level
* @param string $content
*
* @return string
*/
private function createSetextHeader($level, $content)
{
$length = (function_exists('mb_strlen')) ? mb_strlen($content, 'utf-8') : strlen($content);
$underline = ($level === 1) ? '=' : '-';
return $content . "\n" . str_repeat($underline, $length) . "\n\n";
}
/**
* @param int $level
* @param string $content
*
* @return string
*/
private function createAtxHeader($level, $content)
{
$prefix = str_repeat('#', $level) . ' ';
return $prefix . $content . "\n\n";
}
}

View File

@ -0,0 +1,26 @@
<?php
namespace League\HTMLToMarkdown\Converter;
use League\HTMLToMarkdown\ElementInterface;
class HorizontalRuleConverter implements ConverterInterface
{
/**
* @param ElementInterface $element
*
* @return string
*/
public function convert(ElementInterface $element)
{
return "- - - - - -\n\n";
}
/**
* @return string[]
*/
public function getSupportedTags()
{
return array('hr');
}
}

View File

@ -0,0 +1,37 @@
<?php
namespace League\HTMLToMarkdown\Converter;
use League\HTMLToMarkdown\ElementInterface;
class ImageConverter implements ConverterInterface
{
/**
* @param ElementInterface $element
*
* @return string
*/
public function convert(ElementInterface $element)
{
$src = $element->getAttribute('src');
$alt = $element->getAttribute('alt');
$title = $element->getAttribute('title');
if ($title !== '') {
// No newlines added. <img> should be in a block-level element.
$markdown = '![' . $alt . '](' . $src . ' "' . $title . '")';
} else {
$markdown = '![' . $alt . '](' . $src . ')';
}
return $markdown;
}
/**
* @return string[]
*/
public function getSupportedTags()
{
return array('img');
}
}

View File

@ -0,0 +1,42 @@
<?php
namespace League\HTMLToMarkdown\Converter;
use League\HTMLToMarkdown\ElementInterface;
class LinkConverter implements ConverterInterface
{
/**
* @param ElementInterface $element
*
* @return string
*/
public function convert(ElementInterface $element)
{
$href = $element->getAttribute('href');
$title = $element->getAttribute('title');
$text = $element->getValue();
if ($title !== '') {
$markdown = '[' . $text . '](' . $href . ' "' . $title . '")';
} elseif ($href === $text) {
$markdown = '<' . $href . '>';
} else {
$markdown = '[' . $text . '](' . $href . ')';
}
if (!$href) {
$markdown = html_entity_decode($element->getChildrenAsString());
}
return $markdown;
}
/**
* @return string[]
*/
public function getSupportedTags()
{
return array('a');
}
}

View File

@ -0,0 +1,26 @@
<?php
namespace League\HTMLToMarkdown\Converter;
use League\HTMLToMarkdown\ElementInterface;
class ListBlockConverter implements ConverterInterface
{
/**
* @param ElementInterface $element
*
* @return string
*/
public function convert(ElementInterface $element)
{
return $element->getValue() . "\n";
}
/**
* @return string[]
*/
public function getSupportedTags()
{
return array('ol', 'ul');
}
}

View File

@ -0,0 +1,45 @@
<?php
namespace League\HTMLToMarkdown\Converter;
use League\HTMLToMarkdown\ElementInterface;
class ListItemConverter implements ConverterInterface
{
/**
* @param ElementInterface $element
*
* @return string
*/
public function convert(ElementInterface $element)
{
// If parent is an ol, use numbers, otherwise, use dashes
$list_type = $element->getParent()->getTagName();
$value = $element->getValue();
// Add spaces to start for nested list items
$level = $element->getListItemLevel($element);
$prefix = str_repeat(' ', $level);
// If list item is the first in a nested list, add a newline before it
if ($level > 0 && $element->getSiblingPosition() === 1) {
$prefix = "\n" . $prefix;
}
if ($list_type === 'ul') {
$markdown = $prefix . '- ' . trim($value) . "\n";
} else {
$number = $element->getSiblingPosition();
$markdown = $prefix . $number . '. ' . trim($value) . "\n";
}
return $markdown;
}
/**
* @return string[]
*/
public function getSupportedTags()
{
return array('li');
}
}

View File

@ -0,0 +1,28 @@
<?php
namespace League\HTMLToMarkdown\Converter;
use League\HTMLToMarkdown\ElementInterface;
class ParagraphConverter implements ConverterInterface
{
/**
* @param ElementInterface $element
*
* @return string
*/
public function convert(ElementInterface $element)
{
$value = $element->getValue();
return trim($value) !== '' ? rtrim($value) . "\n\n" : '';
}
/**
* @return string[]
*/
public function getSupportedTags()
{
return array('p');
}
}

View File

@ -0,0 +1,73 @@
<?php
namespace League\HTMLToMarkdown\Converter;
use League\HTMLToMarkdown\ElementInterface;
class PreformattedConverter implements ConverterInterface
{
/**
* @param ElementInterface $element
*
* @return string
*/
public function convert(ElementInterface $element)
{
// Store the content of the code block in an array, one entry for each line
$markdown = '';
$code_content = html_entity_decode($element->getChildrenAsString());
$code_content = str_replace(array('<code>', '</code>'), '', $code_content);
$code_content = str_replace(array('<pre>', '</pre>'), '', $code_content);
$lines = preg_split('/\r\n|\r|\n/', $code_content);
$total = count($lines);
// If there's more than one line of code, prepend each line with four spaces and no backticks.
if ($total > 1 || $element->getTagName() === 'pre') {
// Remove the first and last line if they're empty
$first_line = trim($lines[0]);
$last_line = trim($lines[$total - 1]);
$first_line = trim($first_line, '&#xD;'); //trim XML style carriage returns too
$last_line = trim($last_line, '&#xD;');
if (empty($first_line)) {
array_shift($lines);
}
if (empty($last_line)) {
array_pop($lines);
}
$count = 1;
foreach ($lines as $line) {
$line = str_replace('&#xD;', '', $line);
$markdown .= ' ' . $line;
// Add newlines, except final line of the code
if ($count !== $total) {
$markdown .= "\n";
}
$count++;
}
$markdown .= "\n";
} else {
// There's only one line of code. It's a code span, not a block. Just wrap it with backticks.
$markdown .= '`' . $lines[0] . '`';
}
if ($element->getTagName() === 'pre') {
$markdown = "\n" . $markdown . "\n";
}
return $markdown;
}
/**
* @return string[]
*/
public function getSupportedTags()
{
return array('pre', 'code');
}
}

View File

@ -0,0 +1,42 @@
<?php
namespace League\HTMLToMarkdown\Converter;
use League\HTMLToMarkdown\ElementInterface;
class TextConverter implements ConverterInterface
{
/**
* @param ElementInterface $element
*
* @return string
*/
public function convert(ElementInterface $element)
{
$value = $element->getValue();
$markdown = preg_replace('~\s+~u', ' ', $value);
//escape the following characters: '*', '_' and '\'
$markdown = preg_replace('~([*_\\\\])~u', '\\\\$1', $markdown);
$markdown = preg_replace('~^#~u', '\\\\#', $markdown);
if ($markdown === ' ') {
$next = $element->getNext();
if (!$next || $next->isBlock()) {
$markdown = '';
}
}
return $markdown;
}
/**
* @return string[]
*/
public function getSupportedTags()
{
return array('#text');
}
}

View File

@ -0,0 +1,252 @@
<?php
namespace League\HTMLToMarkdown;
class Element implements ElementInterface
{
/**
* @var \DOMNode
*/
protected $node;
/**
* @var ElementInterface|null
*/
private $nextCached;
public function __construct(\DOMNode $node)
{
$this->node = $node;
}
/**
* @return bool
*/
public function isBlock()
{
switch ($this->getTagName()) {
case 'blockquote':
case 'body':
case 'code':
case 'div':
case 'h1':
case 'h2':
case 'h3':
case 'h4':
case 'h5':
case 'h6':
case 'hr':
case 'html':
case 'li':
case 'p':
case 'ol':
case 'ul':
return true;
default:
return false;
}
}
/**
* @return bool
*/
public function isText()
{
return $this->getTagName() === '#text';
}
/**
* @return bool
*/
public function isWhitespace()
{
return $this->getTagName() === '#text' && trim($this->getValue()) === '';
}
/**
* @return string
*/
public function getTagName()
{
return $this->node->nodeName;
}
/**
* @return string
*/
public function getValue()
{
return $this->node->nodeValue;
}
/**
* @return ElementInterface|null
*/
public function getParent()
{
return new static($this->node->parentNode) ?: null;
}
/**
* @return bool
*/
public function hasChildren()
{
return $this->node->hasChildNodes();
}
/**
* @return ElementInterface[]
*/
public function getChildren()
{
$ret = array();
/** @var \DOMNode $node */
foreach ($this->node->childNodes as $node) {
$ret[] = new static($node);
}
return $ret;
}
/**
* @return ElementInterface|null
*/
public function getNext()
{
if ($this->nextCached === null) {
$nextNode = $this->getNextNode($this->node);
if ($nextNode !== null) {
$this->nextCached = new static($nextNode);
}
}
return $this->nextCached;
}
/**
* @param \DomNode $node
*
* @return \DomNode|null
*/
private function getNextNode($node, $checkChildren = true)
{
if ($checkChildren && $node->firstChild) {
return $node->firstChild;
} elseif ($node->nextSibling) {
return $node->nextSibling;
} elseif ($node->parentNode) {
return $this->getNextNode($node->parentNode, false);
}
}
/**
* @param string[]|string $tagNames
*
* @return bool
*/
public function isDescendantOf($tagNames)
{
if (!is_array($tagNames)) {
$tagNames = array($tagNames);
}
for ($p = $this->node->parentNode; $p !== false; $p = $p->parentNode) {
if (is_null($p)) {
return false;
}
if (in_array($p->nodeName, $tagNames)) {
return true;
}
}
return false;
}
/**
* @param string $markdown
*/
public function setFinalMarkdown($markdown)
{
$markdown_node = $this->node->ownerDocument->createTextNode($markdown);
$this->node->parentNode->replaceChild($markdown_node, $this->node);
}
/**
* @return string
*/
public function getChildrenAsString()
{
return $this->node->C14N();
}
/**
* @return int
*/
public function getSiblingPosition()
{
$position = 0;
// Loop through all nodes and find the given $node
foreach ($this->getParent()->getChildren() as $current_node) {
if (!$current_node->isWhitespace()) {
$position++;
}
// TODO: Need a less-buggy way of comparing these
// Perhaps we can somehow ensure that we always have the exact same object and use === instead?
if ($this->equals($current_node)) {
break;
}
}
return $position;
}
/**
* @return int
*/
public function getListItemLevel()
{
$level = 0;
$parent = $this->getParent();
while ($parent !== null && $parent->node->parentNode) {
if ($parent->getTagName() === 'li') {
$level++;
}
$parent = $parent->getParent();
}
return $level;
}
/**
* @param string $name
*
* @return string
*/
public function getAttribute($name)
{
if ($this->node instanceof \DOMElement) {
return $this->node->getAttribute($name);
}
return '';
}
/**
* @param ElementInterface $element
*
* @return bool
*/
public function equals(ElementInterface $element)
{
if ($element instanceof self) {
return $element->node === $this->node;
}
return $element === $this;
}
}

View File

@ -0,0 +1,80 @@
<?php
namespace League\HTMLToMarkdown;
interface ElementInterface
{
/**
* @return bool
*/
public function isBlock();
/**
* @return bool
*/
public function isText();
/**
* @return bool
*/
public function isWhitespace();
/**
* @return string
*/
public function getTagName();
/**
* @return string
*/
public function getValue();
/**
* @return ElementInterface|null
*/
public function getParent();
/**
* @param string|string[] $tagNames
*
* @return bool
*/
public function isDescendantOf($tagNames);
/**
* @return bool
*/
public function hasChildren();
/**
* @return ElementInterface[]
*/
public function getChildren();
/**
* @return ElementInterface|null
*/
public function getNext();
/**
* @return int
*/
public function getSiblingPosition();
/**
* @return string
*/
public function getChildrenAsString();
/**
* @param string $markdown
*/
public function setFinalMarkdown($markdown);
/**
* @param string $name
*
* @return string
*/
public function getAttribute($name);
}

View File

@ -0,0 +1,102 @@
<?php
namespace League\HTMLToMarkdown;
use League\HTMLToMarkdown\Converter\BlockquoteConverter;
use League\HTMLToMarkdown\Converter\CommentConverter;
use League\HTMLToMarkdown\Converter\ConverterInterface;
use League\HTMLToMarkdown\Converter\DefaultConverter;
use League\HTMLToMarkdown\Converter\DivConverter;
use League\HTMLToMarkdown\Converter\EmphasisConverter;
use League\HTMLToMarkdown\Converter\HardBreakConverter;
use League\HTMLToMarkdown\Converter\HeaderConverter;
use League\HTMLToMarkdown\Converter\HorizontalRuleConverter;
use League\HTMLToMarkdown\Converter\ImageConverter;
use League\HTMLToMarkdown\Converter\LinkConverter;
use League\HTMLToMarkdown\Converter\ListBlockConverter;
use League\HTMLToMarkdown\Converter\ListItemConverter;
use League\HTMLToMarkdown\Converter\ParagraphConverter;
use League\HTMLToMarkdown\Converter\PreformattedConverter;
use League\HTMLToMarkdown\Converter\TextConverter;
final class Environment
{
/**
* @var Configuration
*/
protected $config;
/**
* @var ConverterInterface[]
*/
protected $converters = array();
public function __construct(array $config = array())
{
$this->config = new Configuration($config);
$this->addConverter(new DefaultConverter());
}
/**
* @return Configuration
*/
public function getConfig()
{
return $this->config;
}
/**
* @param ConverterInterface $converter
*/
public function addConverter(ConverterInterface $converter)
{
if ($converter instanceof ConfigurationAwareInterface) {
$converter->setConfig($this->config);
}
foreach ($converter->getSupportedTags() as $tag) {
$this->converters[$tag] = $converter;
}
}
/**
* @param string $tag
*
* @return ConverterInterface
*/
public function getConverterByTag($tag)
{
if (isset($this->converters[$tag])) {
return $this->converters[$tag];
}
return $this->converters[DefaultConverter::DEFAULT_CONVERTER];
}
/**
* @param array $config
*
* @return Environment
*/
public static function createDefaultEnvironment(array $config = array())
{
$environment = new static($config);
$environment->addConverter(new BlockquoteConverter());
$environment->addConverter(new CommentConverter());
$environment->addConverter(new DivConverter());
$environment->addConverter(new EmphasisConverter());
$environment->addConverter(new HardBreakConverter());
$environment->addConverter(new HeaderConverter());
$environment->addConverter(new HorizontalRuleConverter());
$environment->addConverter(new ImageConverter());
$environment->addConverter(new LinkConverter());
$environment->addConverter(new ListBlockConverter());
$environment->addConverter(new ListItemConverter());
$environment->addConverter(new ParagraphConverter());
$environment->addConverter(new PreformattedConverter());
$environment->addConverter(new TextConverter());
return $environment;
}
}

View File

@ -0,0 +1,227 @@
<?php
namespace League\HTMLToMarkdown;
/**
* Class HtmlConverter
*
* A helper class to convert HTML to Markdown.
*
* @author Colin O'Dell <colinodell@gmail.com>
* @author Nick Cernis <nick@cern.is>
*
* @link https://github.com/thephpleague/html-to-markdown/ Latest version on GitHub.
*
* @license http://www.opensource.org/licenses/mit-license.php MIT
*/
class HtmlConverter
{
/**
* @var Environment
*/
protected $environment;
/**
* Constructor
*
* @param array $options Configuration options
*/
public function __construct(array $options = array())
{
$defaults = array(
'header_style' => 'setext', // Set to 'atx' to output H1 and H2 headers as # Header1 and ## Header2
'suppress_errors' => true, // Set to false to show warnings when loading malformed HTML
'strip_tags' => false, // Set to true to strip tags that don't have markdown equivalents. N.B. Strips tags, not their content. Useful to clean MS Word HTML output.
'bold_style' => '**', // Set to '__' if you prefer the underlined style
'italic_style' => '_', // Set to '*' if you prefer the asterisk style
'remove_nodes' => '', // space-separated list of dom nodes that should be removed. example: 'meta style script'
);
$this->environment = Environment::createDefaultEnvironment($defaults);
$this->environment->getConfig()->merge($options);
}
/**
* @return Environment
*/
public function getEnvironment()
{
return $this->environment;
}
/**
* @return Configuration
*/
public function getConfig()
{
return $this->environment->getConfig();
}
/**
* Convert
*
* @see HtmlConverter::convert
*
* @param string $html
*
* @return string The Markdown version of the html
*/
public function __invoke($html)
{
return $this->convert($html);
}
/**
* Convert
*
* Loads HTML and passes to getMarkdown()
*
* @param $html
*
* @return string The Markdown version of the html
*/
public function convert($html)
{
if (trim($html) === '') {
return '';
}
$document = $this->createDOMDocument($html);
// Work on the entire DOM tree (including head and body)
if (!($root = $document->getElementsByTagName('html')->item(0))) {
throw new \InvalidArgumentException('Invalid HTML was provided');
}
$rootElement = new Element($root);
$this->convertChildren($rootElement);
// Store the now-modified DOMDocument as a string
$markdown = $document->saveHTML();
$markdown = $this->sanitize($markdown);
return $markdown;
}
/**
* @param string $html
*
* @return \DOMDocument
*/
private function createDOMDocument($html)
{
$document = new \DOMDocument();
if ($this->getConfig()->getOption('suppress_errors')) {
// Suppress conversion errors (from http://bit.ly/pCCRSX)
libxml_use_internal_errors(true);
}
// Hack to load utf-8 HTML (from http://bit.ly/pVDyCt)
$document->loadHTML('<?xml encoding="UTF-8">' . $html);
$document->encoding = 'UTF-8';
if ($this->getConfig()->getOption('suppress_errors')) {
libxml_clear_errors();
}
return $document;
}
/**
* Convert Children
*
* Recursive function to drill into the DOM and convert each node into Markdown from the inside out.
*
* Finds children of each node and convert those to #text nodes containing their Markdown equivalent,
* starting with the innermost element and working up to the outermost element.
*
* @param ElementInterface $element
*/
private function convertChildren(ElementInterface $element)
{
// Don't convert HTML code inside <code> and <pre> blocks to Markdown - that should stay as HTML
if ($element->isDescendantOf(array('pre', 'code'))) {
return;
}
// If the node has children, convert those to Markdown first
if ($element->hasChildren()) {
foreach ($element->getChildren() as $child) {
$this->convertChildren($child);
}
}
// Now that child nodes have been converted, convert the original node
$markdown = $this->convertToMarkdown($element);
// Create a DOM text node containing the Markdown equivalent of the original node
// Replace the old $node e.g. '<h3>Title</h3>' with the new $markdown_node e.g. '### Title'
$element->setFinalMarkdown($markdown);
}
/**
* Convert to Markdown
*
* Converts an individual node into a #text node containing a string of its Markdown equivalent.
*
* Example: An <h3> node with text content of 'Title' becomes a text node with content of '### Title'
*
* @param ElementInterface $element
*
* @return string The converted HTML as Markdown
*/
protected function convertToMarkdown(ElementInterface $element)
{
$tag = $element->getTagName();
// Strip nodes named in remove_nodes
$tags_to_remove = explode(' ', $this->getConfig()->getOption('remove_nodes'));
if (in_array($tag, $tags_to_remove)) {
return false;
}
$converter = $this->environment->getConverterByTag($tag);
return $converter->convert($element);
}
/**
* @param string $markdown
*
* @return string
*/
protected function sanitize($markdown)
{
$markdown = html_entity_decode($markdown, ENT_QUOTES, 'UTF-8');
$markdown = preg_replace('/<!DOCTYPE [^>]+>/', '', $markdown); // Strip doctype declaration
$markdown = trim($markdown); // Remove blank spaces at the beggining of the html
/*
* Removing unwanted tags. Tags should be added to the array in the order they are expected.
* XML, html and body opening tags should be in that order. Same case with closing tags
*/
$unwanted = array('<?xml encoding="UTF-8">', '<html>', '</html>', '<body>', '</body>', '<head>', '</head>', '&#xD;');
foreach ($unwanted as $tag) {
if (strpos($tag, '/') === false) {
// Opening tags
if (strpos($markdown, $tag) === 0) {
$markdown = substr($markdown, strlen($tag));
}
} else {
// Closing tags
if (strpos($markdown, $tag) === strlen($markdown) - strlen($tag)) {
$markdown = substr($markdown, 0, -strlen($tag));
}
}
}
$markdown = trim($markdown, "\n\r\0\x0B");
return $markdown;
}
}