//============================================================================
// Name        : cxxParser.cpp
// Author      : nick
// Version     :
// Copyright   : Your copyright notice
// Description : Hello World in C++, Ansi-style
//============================================================================
#include <iostream>
#include <fstream>
#include <sstream>
#include <cstring>
#include <cstdlib>
#include <vector>
#include <htmlcxx/html/ParserDom.h>
#include <sys/dir.h>
using namespace std;
using namespace htmlcxx::HTML;

bool getString(const string& strFile, string& strContent)
{
    ifstream ifs(strFile.c_str(), ifstream::in);
    if (ifs.good())
    {
        stringstream ss;
        ss << ifs.rdbuf();
        strContent = ss.str();
        return true;
    }
    return false;
}

bool setString(const string& strFile, const string& strContent)
{
    ofstream ofs(strFile.c_str(), ifstream::out|ifstream::trunc);
    if (ofs.good())
    {
        ofs<< strContent;
        return true;
    }
    return false;
}

bool myTagCompare(const string& strTag, const string& strTagName)
{
    string strTarget = strTagName+" ";
    for (size_t i = 0; i < strTag.size(); i ++)
    {
        if (strTag[i] != ' ' && strTag[i] != '\t' && strTag[i] != '\n')
        {
            if (strncasecmp(strTag.substr(i).c_str(), strTarget.c_str(), strTarget.size()) == 0)
            {
                return true;
            }
        }
    }
    return false;
}
bool findHeadTag(const string& strContent, size_t& start, const char* pTag="head");

bool findHeadTag(const string& strContent, size_t& start, const char* pTag)
{
    ParserDom dom;
    const tree<Node>& tr = dom.parseTree(strContent);
    for (tree<Node>::pre_order_iterator it = tr.begin(); it != tr.end(); it++)
    {
        if (it->isTag() && myTagCompare(it->tagName(),pTag) == 0)
        {
            size_t pos = strContent.find_first_of('>', it->offset());
            if (pos != string::npos)
            {
                start = pos+1;
                return true;
            }
        }
    }
    return false;
}


bool addAds(const string& strFile, const string& strAds)
{
    string strContent;
    if (getString(strFile, strContent))
    {
        size_t start = 0;
        if (findHeadTag(strContent, start))
        {
            if (strContent.substr(start, strAds.size()).compare(strAds) != 0)
            {
                strContent.insert(start, strAds);
                return setString(strFile, strContent);
            }
        }
    }
    return false;
}

bool removeAds(const string& strFile, const string& strAds)
{
    string strContent;
    if (getString(strFile, strContent))
    {
        size_t start = 0;
        if (findHeadTag(strContent, start))
        {
            if (strContent.substr(start, strAds.size()).compare(strAds) == 0)
            {
                strContent.erase(start, strAds.size());
                return setString(strFile, strContent);
            }
        }
    }
    return false;
}

bool replaceAds(const string& strFile, const string& strOldAds, const string& strNewAds)
{
    string strContent;
    if (getString(strFile, strContent))
    {
        size_t start = 0;
        if (findHeadTag(strContent, start))
        {
            if (strContent.substr(start, strOldAds.size()).compare(strOldAds) == 0)
            {
                strContent.erase(start, strOldAds.size());
                strContent.insert(start, strNewAds);
                return setString(strFile, strContent);
            }
        }
    }
    return false;
}

bool isHtml(const string& strName)
{
    size_t pos = strName.find_last_of('.');
    if (pos != string::npos)
    {
        const string& strExt = strName.substr(pos+1);
        if (strcasecmp(strExt.c_str(), "html") == 0 || strcasecmp(strExt.c_str(), "htm") == 0)
        {
            return true;
        }
    }
    return false;
}
bool isForbidden(const string& strDir)
{
    if (strDir.compare("personal") == 0 || strDir.compare("generatedTrees") == 0)
    {
        return true;
    }
    return false;
}

bool doGetAllFiles(const string& strPath, vector<string>& vect)
{
    DIR *dir;
    if ((dir = opendir(strPath.c_str())) != NULL)
    {
        struct dirent *ent;
        while ((ent = readdir(dir)) != NULL)
        {
            switch (ent->d_type)
            {
            case DT_DIR:
                if (strcmp(ent->d_name, ".") != 0 && strcmp(ent->d_name, "..") != 0)
                {
                    if (!isForbidden(ent->d_name))
                    {
                        if (!doGetAllFiles(strPath+"/"+ ent->d_name, vect))
                        {
                            return false;
                        }
                    }
                }
                break;
            case DT_REG:
                if (isHtml(ent->d_name))
                {
                    vect.push_back(strPath+"/"+ent->d_name);
                }
                break;
            default:
                break;
            }
        }
        closedir(dir);
    }
    else
    {
        perror (strPath.c_str());
        return false;
    }
    return true;
}

bool doFindHeadTag(const string& strFileName)
{
    string strContent;
    if (getString(strFileName, strContent))
    {
        size_t start = 0;
        if (!findHeadTag(strContent, start))
        {
            cout << strFileName << endl;
        }
    }
    return true;
}

bool doAddHeadTag(const string& strFileName, const string& strAds)
{
    string strContent;
    if (getString(strFileName, strContent))
    {
        size_t start = 0;
        string str = strAds;
        if (!findHeadTag(strContent, start))
        {
            if (findHeadTag(strContent, start, "html"))
            {
                str = "<head>";
                str += strAds;
                str += "</head>";
            }
            else
            {
                return false;
            }
        }
        else
        {
            if (strContent.substr(start, strAds.size()).compare(strAds) == 0)
            {
                return false;
            }
        }
        strContent.insert(start, str);
        return setString(strFileName, strContent);
    }
    return false;
}

typedef bool(*CallbackType)(const string&);

bool doGetAllFiles(const string& strPath, CallbackType cb)
{
    DIR *dir;
    if ((dir = opendir(strPath.c_str())) != NULL)
    {
        struct dirent *ent;
        while ((ent = readdir(dir)) != NULL)
        {
            switch (ent->d_type)
            {
            case DT_DIR:
                if (strcmp(ent->d_name, ".") != 0 && strcmp(ent->d_name, "..") != 0)
                {
                    if (!isForbidden(ent->d_name))
                    {
                        if (!doGetAllFiles(strPath+"/"+ ent->d_name, cb))
                        {
                            return false;
                        }
                    }
                }
                break;
            case DT_REG:
                if (isHtml(ent->d_name))
                {
                    if (!cb(strPath+"/"+ent->d_name))
                    {
                        return false;
                    }
                }
                break;
            default:
                break;
            }
        }
        closedir(dir);
    }
    else
    {
        perror (strPath.c_str());
        return false;
    }
    return true;
}

bool getAllFiles(const string& strPath, vector<string>& vect)
{
    char path[PATH_MAX+1];
    if (realpath(strPath.c_str(), path))
    {
        return doGetAllFiles(path, vect);
    }
    return false;
}

bool doSearchAds(const string& strFile)
{
    string strContent;
    if (getString(strFile, strContent))
    {
        ParserDom dom;
        const tree<Node>& tr = dom.parseTree(strContent);
        for (tree<Node>::pre_order_iterator it = tr.begin(); it != tr.end(); it++)
        {
            if (it->isTag() && it->tagName().compare("script") == 0)
            {
                if (it->text().find("google")!= string::npos)
                {
                    cout << "filename:"<< strFile << " text:" << it->text() << endl;
                }

            }
        }
    }
    return false;
}

bool doSearchAllFiles(const string& strPath)
{
    DIR *dir;
    if ((dir = opendir(strPath.c_str())) != NULL)
    {
        struct dirent *ent;

        while ((ent = readdir(dir)) != NULL)
        {
            string strNewPath = strPath+"/" + ent->d_name;
            switch (ent->d_type)
            {
            case DT_DIR:
                if (strcmp(ent->d_name, ".") != 0 && strcmp(ent->d_name, "..") != 0)
                {
                    if (!isForbidden(ent->d_name))
                    {
                        if (!doSearchAllFiles(strNewPath))
                        {
                            return false;
                        }
                    }
                }
                break;
            case DT_REG:
                if (isHtml(ent->d_name))
                {
                    doSearchAds(strNewPath);
                }
                break;
            default:
                break;
            }
        }
        closedir(dir);
    }
    else
    {
        perror (strPath.c_str());
        return false;
    }
    return true;
}

bool searchAds(const string& strPath)
{
    char path[PATH_MAX+1];
    if (realpath(strPath.c_str(), path))
    {
        return doSearchAllFiles(path);
    }
    return false;
}

bool test2(const string& strPath)
{
    vector<string> vect;
    if (getAllFiles(strPath, vect))
    {
        for (size_t i =0; i < vect.size(); i ++)
        {
            cout << vect[i] << endl;
        }
        cout << "total file number: " << vect.size() << endl;
        return true;
    }
    return false;
}



bool test1(const string& strFileName)
{
    const string strAdsFile="/home/nick/googleads.txt";
    string strAds;
    string strOldContent, strNewContent;
    if (getString(strAdsFile, strAds))
    {
        if (getString(strFileName, strOldContent))
        {
            if (addAds(strFileName, strAds))
            {
                if (removeAds(strFileName, strAds))
                {
                    if (getString(strFileName, strNewContent))
                    {
                       if (strOldContent.compare(strNewContent) == 0)
                       {
                           cout << "content remain unchanged" << endl;
                           return true;
                       }
                    }
                }
            }
        }
    }
    return false;
}

bool test3(const string& strFileName)
{
    const string strAdsFile="/home/nick/googleads.txt";
    string strAds;
    string strOldContent, strNewContent;
    if (getString(strAdsFile, strAds))
    {
        if (getString(strFileName, strOldContent))
        {
            if (addAds(strFileName, strAds))
            {
                if (removeAds(strFileName, strAds))
                {
                    if (getString(strFileName, strNewContent))
                    {
                       if (strOldContent.compare(strNewContent) == 0)
                       {
                           cout << "content remain unchanged" << endl;
                           return true;
                       }
                    }
                }
            }
        }
    }
    return false;
}


bool test4(const string& strPath)
{
    char path[PATH_MAX+1];
    if (realpath(strPath.c_str(), path))
    {
        return doGetAllFiles(path, doFindHeadTag);
    }
    return false;
}

bool findAllTags(const string& strFile, const char* pTag)
{
    string strContent;
    if (getString(strFile, strContent))
    {
        ParserDom dom;
        const tree<Node>& tr = dom.parseTree(strContent);
        for (tree<Node>::pre_order_iterator it = tr.begin(); it != tr.end(); it++)
        {
            if (it->isTag() && strcasecmp(it->tagName().c_str(),pTag) == 0)
            {
               cout << strContent.substr(it->offset(), it->length()) << endl;
            }
        }
    }
    return false;
}

bool test5(const string& strPath)
{
    vector<string> vect;
    if (getAllFiles(strPath, vect))
    {
        for (size_t i =0; i < vect.size(); i ++)
        {
            if (findAllTags(vect[i], "a"))
            {
                cout << vect[i] << endl;
            }
        }
        cout << "total file number: " << vect.size() << endl;
        return true;
    }
    return false;
}
int myAddAds(const string& strPath, const string& strAdsFile)
{
    int result = 0;
    string strAds;
    if (getString(strAdsFile, strAds))
    {
        vector<string> vect;
        if (getAllFiles(strPath, vect))
        {
            for (size_t i =0; i < vect.size(); i ++)
            {
                if (addAds(vect[i], strAds))
                {
                    cout << vect[i] << endl;
                    result ++;
                }
            }
        }
    }
    return result;
}

int myAddAdsByAddingHead(const string& strPath, const string& strAdsFile)
{
    int result = 0;
    string strAds;
    if (getString(strAdsFile, strAds))
    {
        vector<string> vect;
        if (getAllFiles(strPath, vect))
        {
            for (size_t i =0; i < vect.size(); i ++)
            {
                if (doAddHeadTag(vect[i], strAds))
                {
                    cout << vect[i] << endl;
                    result ++;
                }
            }
        }
    }
    return result;
}

int myReplaceAdsByAddingHead(const string& strPath, const string& strOldAdsFile, const string& strNewAdsFile)
{
    int result = 0;
    string strOldAds, strNewAds;
    if (getString(strOldAdsFile, strOldAds) && getString(strNewAdsFile, strNewAds))
    {
        vector<string> vect;
        if (getAllFiles(strPath, vect))
        {
            for (size_t i =0; i < vect.size(); i ++)
            {
                if (replaceAds(vect[i], strOldAds, strNewAds))
                {
                    cout << vect[i] << endl;
                    result ++;
                }
            }
        }
    }
    return result;
}


int main(int argc, char** argv)
{
    if (argc != 3)
    {
        cout << "usage: " << argv[0] << " <diabloPath> <googleads>" << endl;
        return -1;
    }
    cout << myAddAdsByAddingHead(argv[1], argv[2]) << endl;


//    if (argc != 4)
//    {
//        cout << "usage: " << argv[0] << " <diabloPath> <googleOldAds> <googleNewAds>" << endl;
//        return -1;
//    }
//    cout << myReplaceAdsByAddingHead(argv[1], argv[2], argv[3]) << endl;


//    if (argc != 2)
//    {
//        cout << "usage: " << argv[0] << " <diabloPath>" << endl;
//        return -1;
//    }
//    test5(argv[1]);
	return 0;
}