| 
<?php
 /**
 * Transform the awful HTML of Flash into standardized HTML.
 */
 class Flash2HTML {
 
 private $parser;
 private $nodes = array();
 private $entities = array();
 private $content = NULL;
 private $dataHandler = NULL;
 public $plainText = FALSE;
 public $protectEmail = FALSE;
 
 public function Flash2HTML() {
 $this->setTagTransformation('B', 'strong');
 $this->setTagTransformation('I', 'em');
 $this->setTagTransformation('LI', 'li');
 
 $this->setStartTagHandler('A', array($this, "a_startTagHandler"));
 $this->setStartTagHandler('FONT', array($this, "font_startTagHandler"));
 $this->setStartTagHandler('IMG', array($this, "img_startTagHandler"));
 $this->setStartTagHandler("P", array($this, "p_startTagHandler"));
 $this->setEndTagHandler("P", array($this, "p_endTagHandler"));
 $this->setStartTagHandler('TEXTFORMAT', array($this, "textformat_startTagHandler"));
 $this->setStartTagHandler('U', array($this, "u_startTagHandler"));
 }
 
 /**
 *
 * @param String $data
 * @param Array $properties
 * @return String
 */
 public function __invoke($data, $properties = array()) {
 return $this->html($data, $properties);
 }
 
 /**
 *
 * @param String $tag
 */
 public function ignoreTag($tag) {
 $offset = array_search($tag, array_keys($this->entities));
 if ($offset !== FALSE) {
 array_splice($this->entities, $offset, 1);
 }
 }
 
 /**
 *
 * @param String $from_tag
 * @param String $to_tag
 */
 public function setTagTransformation($from_tag, $to_tag) {
 if (preg_match("/^\w+$/", $to_tag, $matches)) {
 $this->entities[$from_tag]["tag"] = $matches[0];
 $this->entities[$from_tag]["attributes"] = NULL;
 $this->entities[$from_tag]["closed"] = FALSE;
 $this->entities[$from_tag]["start_tag_handler"] = NULL;
 $this->entities[$from_tag]["data_tag_handler"] = NULL;
 $this->entities[$from_tag]["end_tag_handler"] = NULL;
 } else
 if (preg_match("/^<(\w+)\s+(.*)(\/?)>$/U", $to_tag, $matches)) {
 $this->entities[$from_tag]["tag"] = $matches[1];
 $this->entities[$from_tag]["attributes"] = $matches[2];
 $this->entities[$from_tag]["closed"] = $matches[3] == "/";
 $this->entities[$from_tag]["start_tag_handler"] = NULL;
 $this->entities[$from_tag]["data_tag_handler"] = NULL;
 $this->entities[$from_tag]["end_tag_handler"] = NULL;
 }
 }
 
 /**
 *
 * @param String $tag
 * @param Function $start_tag_handler
 */
 public function setStartTagHandler($tag, $start_tag_handler) {
 if (!array_key_exists($tag, $this->entities)) {
 $this->setTagTransformation($tag, strtolower($tag));
 }
 $this->entities[$tag]["start_tag_handler"] = $start_tag_handler;
 }
 
 /**
 *
 * @param String $tag
 * @param Function $data_handler
 */
 public function setDataHandler($data_handler) {
 $this->dataHandler = $data_handler;
 }
 
 /**
 *
 * @param String $tag
 * @param Function $end_tag_handler
 */
 public function setEndTagHandler($tag, $end_tag_handler) {
 if (!array_key_exists($tag, $this->entities)) {
 $this->setTagTransformation($tag, strtolower($tag));
 }
 $this->entities[$tag]["end_tag_handler"] = $end_tag_handler;
 }
 
 public function removeStartTagHandler($tag) {
 $this->entities[$tag]["start_tag_handler"] = NULL;
 }
 
 public function removeDataTagHandler($tag) {
 $this->entities[$tag]["data_tag_handler"] = NULL;
 }
 
 public function removeEndTagHandler($tag) {
 $this->entities[$tag]["end_tag_handler"] = NULL;
 }
 
 private function p_startTagHandler($parser, $tag, $attrs) {
 return NULL;
 }
 
 private function p_endTagHandler($parser, $tag) {
 return "<br />\n";
 }
 
 /**
 *
 * @param String $data
 * @param Array $properties
 * @return String
 */
 public function html($data, $properties = array()) {
 $uniqid = uniqid("unicode_");
 $data = json_encode($data);
 $data = preg_replace('/\\\u([0-9a-z]{4})/', "$uniqid\$1", $data);
 $data = json_decode($data);
 
 $original_properties = array();
 foreach ($properties as $key => $value) {
 $original_properties[$key] = $this->$key;
 $this->$key = $value;
 }
 
 $data = $this->prepare($data);
 
 $this->parser = xml_parser_create();
 xml_set_object($this->parser, $this);
 xml_parser_set_option($this->parser, XML_OPTION_CASE_FOLDING, TRUE);
 xml_set_element_handler($this->parser, "startTagHandler", "endTagHandler");
 xml_set_character_data_handler($this->parser, "dataHandler");
 xml_parse($this->parser, $data);
 xml_parser_free($this->parser);
 $ret = $this->content;
 $this->content = NULL;
 
 $ret = utf8_decode(str_replace(array('–', '’'), array('–', '’'), $ret));
 if ($this->protectEmail) {
 $ret = preg_replace_callback("/mailto:(.*)\"/U", array($this, "protectEmailCallback"), $ret);
 }
 
 // restore original properties
 foreach ($original_properties as $key => $value) {
 $this->$key = $value;
 }
 
 $ret = preg_replace("/$uniqid([0-9a-z]{4})/", '&#x$1;', $ret);
 $ret = preg_replace(array("/<br \/>\n$/", "/<br \/>\n<\/div>$/"), array(NULL, "</div>"), $ret);
 $ret = preg_replace("/<span[^>]*><\/span>/U", NULL, $ret);
 $ret = preg_replace("/<a([^>]*)><span style=\"text-decoration: underline; \">(.*)<\/span><\/a>/U", "<a\$1>\$2</a>", $ret);
 $ret = preg_replace("/<li>(.*)<\/li>/", "<ul><li>$1</li>\n</ul>\n", $ret);
 $ret = preg_replace("/<li>/", "\n\t<li>", $ret);
 $ret = preg_replace("/<br \/>\n<ul>/", "<ul>", $ret);
 $ret = preg_replace("/<\/ul>\n<br \/>/", "</ul>", $ret);
 
 return $ret;
 }
 
 private function prepare($data) {
 if (!$this->plainText) {
 $data = preg_replace_callback("/HREF=\"(.*)\"/U", array($this, "fixHREFCallback"), $data);
 $data = preg_replace("/<IMG(.*)>/U", "<IMG$1 />", $data);
 }
 return "<root>$data</root>";
 }
 
 private function startTagHandler($parser, $tag, $attrs) {
 if ($this->plainText) {
 return;
 }
 
 if (!key_exists($tag, $this->entities)) {
 return;
 }
 
 $entity = $this->entities[$tag];
 if ($entity['start_tag_handler'] != NULL) {
 $str = call_user_func($entity["start_tag_handler"], $parser, $tag, $attrs);
 if (preg_match("/^<(\w+)/", $str, $matches)) {
 $tag_name = $matches[1];
 $entity["tag"] = $tag_name;
 $entity["closed"] = preg_match("/.*\/>/", $str) === 1;
 }
 $this->content .= $str;
 } else {
 $this->content .= "<$entity[tag]";
 if (strlen($entity["attributes"]) > 0) {
 $this->content .= " $entity[attributes]";
 }
 if ($entity["closed"]) {
 $this->content .= " />";
 } else {
 $this->content .= ">";
 }
 }
 
 array_push($this->nodes, $entity);
 }
 
 private function dataHandler($parser, $cdata) {
 if ($this->plainText) {
 $this->content .= $cdata;
 } else {
 $str = NULL;
 if ($cdata == "&") {
 $str = "&";
 } else {
 $str = preg_replace_callback("/\s{2,}/", array($this, "replaceSpacesCallback"), $cdata);
 }
 
 if ($this->dataHandler != NULL) {
 $str = call_user_func($this->dataHandler, $parser, $cdata);
 }
 
 $this->content .= $str;
 }
 }
 
 private function endTagHandler($parser, $tag) {
 if (!key_exists($tag, $this->entities)) {
 return;
 }
 
 if ($this->plainText) {
 if ($tag == "P") {
 $this->content .= "\n";
 }
 return;
 }
 
 $entity = array_pop($this->nodes);
 if (!$entity["closed"]) {
 if ($entity['end_tag_handler'] != NULL) {
 $this->content .= call_user_func($entity["end_tag_handler"], $parser, $tag);
 } else {
 $this->content .= "</$entity[tag]>";
 }
 }
 }
 
 private function a_startTagHandler($parser, $tag, $attrs) {
 $ret = '<a href="' . htmlspecialchars($attrs["HREF"]) . '"';
 $ret .= array_key_exists("TARGET", $attrs) && (strlen($attrs["TARGET"]) > 0) ? ' target="' . $attrs["TARGET"] . '"' : NULL;
 $ret .= '>';
 return $ret;
 }
 
 private function font_startTagHandler($parser, $tag, $attrs) {
 $ret = '<span style="';
 $ret .= array_key_exists("FACE", $attrs) ? "font-family: '" . $attrs["FACE"] . "'; " : NULL;
 $ret .= array_key_exists("SIZE", $attrs) ? "font-size: " . $attrs["SIZE"] . "px; " : NULL;
 $ret .= array_key_exists("COLOR", $attrs) ? "color: " . $attrs["COLOR"] . "; " : NULL;
 $ret .= array_key_exists("LEADING", $attrs) ? "line-height: " . $attrs["LEADING"] . "px; " : NULL;
 $ret .= array_key_exists("LETTERSPACING", $attrs) ? "letter-spacing: " . $attrs["LETTERSPACING"] . "px; " : NULL;
 $ret .= '">';
 return $ret;
 }
 
 private function img_startTagHandler($parser, $tag, $attrs) {
 $style = $attrs["ALIGN"] == "right" ? "float: right; margin-left: 10px; " : "float: left; margin-right: 10px; ";
 $ret = "<img style=\"$style\" src=\"$attrs[SRC]\" alt=\"\" />";
 return $ret;
 }
 
 private function textformat_startTagHandler($parser, $tag, $attrs) {
 $ret = NULL;
 if (array_key_exists("BLOCKINDENT", $attrs) || array_key_exists("LEADING", $attrs)) {
 $ret = '<div style="';
 $ret .= array_key_exists("BLOCKINDENT", $attrs) ? "margin-left: " . $attrs["BLOCKINDENT"] . "px; " : NULL;
 $ret .= array_key_exists("LEADING", $attrs) ? "margin-bottom: " . $attrs["LEADING"] . "px; " : NULL;
 $ret .= '">';
 }
 return $ret;
 }
 
 private function u_startTagHandler($parser, $tag, $attrs) {
 $ret = '<span style="text-decoration: underline; ">';
 return $ret;
 }
 
 private function replaceSpacesCallback($matches) {
 $matches[0];
 return str_repeat(" ", strlen($matches[0]));
 }
 
 private function protectEmailCallback($matches) {
 $ret = NULL;
 $str = $matches[1];
 $len = strlen($str);
 for ($i = 0; $i < $len; $i++) {
 $ret .= "&#x" . strtoupper(dechex(ord($str[$i]))) . ";";
 }
 return "mailto:$ret\"";
 }
 
 private function fixHREFCallback($matches) {
 return 'HREF="' . htmlentities($matches[1]) . '"';
 }
 
 }
 
 ?>
 |