<?php

namespace Drupal\goutte;

/**
 * GoutteImporter class imports Goutte.
 */
class GoutteImporter {

  /**
   * Short description for GoutteConnector.
   *
   * @var Drupal\goutte\GoutteConnector
   */
  protected $crawler;

  /**
   * Short description for Client.
   *
   * @var Symfony\Component\Panther\Client
   */
  protected $panther;

  /**
   * Short description for NodeBuilder.
   *
   * @var Drupal\goutte\NodeBuilder
   */
  protected $node;

  /**
   * Short description for ContentMassager.
   *
   * @var Drupal\goutte\ContentMassager
   */
  protected $massager;

  /**
   * Class constructor.
   */
  public function __construct() {
    $this->crawler = new GoutteConnector();
    $this->node = new NodeBuilder();
    $this->massager = new ContentMassager();
  }

  /**
   * Function importContent.
   */
  public function importContent($item) {

    $this->crawler->connect($item['url']);

    // Check that the crawler made connection.
    if ($this->crawler) {
      // Check that the crawler has content.
      if ($this->crawler->hasNodes()) {
        $this->node->setUp($item['type']);
        $this->node->setUrl($item['url']);

        $title = $this->crawler->parseXpath($item['title_filter']);
        if (trim($title) == '') {
          $title = $this->crawler->parseXpath($item['secondary_title_filter']);
          \Drupal::logger('Goutte Breadcrumb Current')->notice($title);
        }
        // $content = $this->crawler->parseXpathContent('.welcome-content');
        $content = $this->crawler->parseXpath($item['content_filter']);

        if (trim($title) != '') {

          // @todo Convert into loop based on UI configuration settings.
          // User interface provides type, filters, and formatter settings.
          // Loop through each URL and apply custom settings.
          $images = $this->crawler->parseXpathSrc($item['image_filter']);
          $pdfs = $this->crawler->parseXpathHref($item['pdf_filter']);
          // Title.
          $this->node->setTitleField('title', $title);

          // Legacy Address.
          $item_url = str_replace('http://www.yrdsb.ca/', '', $item['url']);
          $this->node->setLegacyField('field_legacy_url', '/' . $item_url);

          // Images.
          if (isset($images)) {
            $this->node->uploadFile($images, 'image');
          }

          // PDFs.
          if (isset($pdfs)) {
            $this->node->uploadFile($pdfs, 'pdf');
          }

          // Content Transformation.
          $body = $content;
          if ($body != '') {
            $body = $this->massager->cleanHtmlTagsAttributes($body, [
              'style',
              'align',
              'valign',
              'dir',
            ]);
            $body = $this->massager->removeLineBreaks($body);
            $body = $this->massager->removeScripts($body);
            $body = $this->massager->removeTitle($body);
            $body = $this->massager->removeDivIded($body, 'ctl00_PlaceHolderMain_ctl02_label');
            $body = $this->massager->replaceTagtoTag($body, 'h5', 'h6');
            $body = $this->massager->replaceTagtoTag($body, 'h4', 'h5');
            $body = $this->massager->replaceTagtoTag($body, 'h3', 'h4');
            $body = $this->massager->replaceTagtoTag($body, 'h2', 'h3');
            $body = $this->massager->replaceTagtoTag($body, 'h1', 'h2');
            $body = $this->massager->replaceTagtoTag($body, 'div', 'p');
            $body = $this->massager->replaceBtags($body);
            $body = $this->massager->removeTargetBlank($body);
            $body = $this->massager->replaceNbsp($body);
            $body = $this->massager->removeAbsoluteUrls($body);
            $body = $this->massager->removeShareButtons($body);

            if (NULL !== $this->node->getInternalLinks() && count($this->node->getInternalLinks()) > 0) {
              foreach ($this->node->getInternalLinks() as $links) {
                $body = $this->massager->replaceFilePath($body, $links[0], $links[1]);
              }
            }

            $body = $this->massager->cleanHtmlTagsAttributes($body, ['class']);
            $body = $this->massager->removeEmptyParagraphs($body);

            $this->node->setBodyField('body', $body);
            switch ($this->node->getType()) {
              case "blog":

                if ($this->crawler->parseXpath($item['blog_type_filter']) != '') {
                  $blog_release_type[] = $this->crawler->parseXpath($item['blog_type_filter']);
                }

                $published_date = $item['published_date_filter'];

                // Tags.
                $this->node->setTaxonomyField('field_blog_type', 'news', $blog_release_type);

                // Date.
                $this->node->setDateField('created', $published_date, 'UNIX');
                $this->node->setDateField('changed', $published_date, 'UNIX');
                \Drupal::logger('Date')->notice($published_date);
                break;

              case "news":

                if ($this->crawler->parseXpath($item['news_release_type_filter']) != '') {
                  $news_release_type[] = $this->crawler->parseXpath($item['news_release_type_filter']);
                }
                else {
                  \Drupal::logger('News without type')->notice($item['url']);
                  $news_release_type[] = 'News release';
                }

                if ($this->crawler->parseXpath($item['published_date_filter']) != '') {
                  $published_date = $this->crawler->parseXpath($item['published_date_filter']);
                }
                else {
                  $published_date = '1/1/2018';
                  \Drupal::logger('News without date')->notice($item['url']);
                }

                // Tags.
                $this->node->setTaxonomyField('field_news_type', 'news', $news_release_type);

                // Date.
                $this->node->setDateField('created', $published_date, 'UNIX');
                $this->node->setDateField('changed', $published_date, 'UNIX');
                \Drupal::logger('Date')->notice($published_date);
                break;
            }
          }
          $this->node->tearDown($item_url);
          \Drupal::logger('Goutte Succeed')->notice($item['url']);
        }
        else {
          \Drupal::logger('Goutte Failed')->notice($item['url']);
        }
        $this->crawler->tearDown();
      }
    }
  }

}
