#include <iostream>
#include <fstream>
#include <sstream>
#include <cstring>
#include <cstdlib>
#include <vector>
#include <htmlcxx/html/ParserDom.h>
#include <sys/dir.h>
using namespace std;
using namespace htmlcxx::HTML;
bool getString(const string& strFile, string& strContent)
{
ifstream ifs(strFile.c_str(), ifstream::in);
if (ifs.good())
{
stringstream ss;
ss << ifs.rdbuf();
strContent = ss.str();
return true;
}
return false;
}
bool setString(const string& strFile, const string& strContent)
{
ofstream ofs(strFile.c_str(), ifstream::out|ifstream::trunc);
if (ofs.good())
{
ofs<< strContent;
return true;
}
return false;
}
bool myTagCompare(const string& strTag, const string& strTagName)
{
string strTarget = strTagName+" ";
for (size_t i = 0; i < strTag.size(); i ++)
{
if (strTag[i] != ' ' && strTag[i] != '\t' && strTag[i] != '\n')
{
if (strncasecmp(strTag.substr(i).c_str(), strTarget.c_str(), strTarget.size()) == 0)
{
return true;
}
}
}
return false;
}
bool findHeadTag(const string& strContent, size_t& start, const char* pTag="head");
bool findHeadTag(const string& strContent, size_t& start, const char* pTag)
{
ParserDom dom;
const tree<Node>& tr = dom.parseTree(strContent);
for (tree<Node>::pre_order_iterator it = tr.begin(); it != tr.end(); it++)
{
if (it->isTag() && myTagCompare(it->tagName(),pTag) == 0)
{
size_t pos = strContent.find_first_of('>', it->offset());
if (pos != string::npos)
{
start = pos+1;
return true;
}
}
}
return false;
}
bool addAds(const string& strFile, const string& strAds)
{
string strContent;
if (getString(strFile, strContent))
{
size_t start = 0;
if (findHeadTag(strContent, start))
{
if (strContent.substr(start, strAds.size()).compare(strAds) != 0)
{
strContent.insert(start, strAds);
return setString(strFile, strContent);
}
}
}
return false;
}
bool removeAds(const string& strFile, const string& strAds)
{
string strContent;
if (getString(strFile, strContent))
{
size_t start = 0;
if (findHeadTag(strContent, start))
{
if (strContent.substr(start, strAds.size()).compare(strAds) == 0)
{
strContent.erase(start, strAds.size());
return setString(strFile, strContent);
}
}
}
return false;
}
bool replaceAds(const string& strFile, const string& strOldAds, const string& strNewAds)
{
string strContent;
if (getString(strFile, strContent))
{
size_t start = 0;
if (findHeadTag(strContent, start))
{
if (strContent.substr(start, strOldAds.size()).compare(strOldAds) == 0)
{
strContent.erase(start, strOldAds.size());
strContent.insert(start, strNewAds);
return setString(strFile, strContent);
}
}
}
return false;
}
bool isHtml(const string& strName)
{
size_t pos = strName.find_last_of('.');
if (pos != string::npos)
{
const string& strExt = strName.substr(pos+1);
if (strcasecmp(strExt.c_str(), "html") == 0 || strcasecmp(strExt.c_str(), "htm") == 0)
{
return true;
}
}
return false;
}
bool isForbidden(const string& strDir)
{
if (strDir.compare("personal") == 0 || strDir.compare("generatedTrees") == 0)
{
return true;
}
return false;
}
bool doGetAllFiles(const string& strPath, vector<string>& vect)
{
DIR *dir;
if ((dir = opendir(strPath.c_str())) != NULL)
{
struct dirent *ent;
while ((ent = readdir(dir)) != NULL)
{
switch (ent->d_type)
{
case DT_DIR:
if (strcmp(ent->d_name, ".") != 0 && strcmp(ent->d_name, "..") != 0)
{
if (!isForbidden(ent->d_name))
{
if (!doGetAllFiles(strPath+"/"+ ent->d_name, vect))
{
return false;
}
}
}
break;
case DT_REG:
if (isHtml(ent->d_name))
{
vect.push_back(strPath+"/"+ent->d_name);
}
break;
default:
break;
}
}
closedir(dir);
}
else
{
perror (strPath.c_str());
return false;
}
return true;
}
bool doFindHeadTag(const string& strFileName)
{
string strContent;
if (getString(strFileName, strContent))
{
size_t start = 0;
if (!findHeadTag(strContent, start))
{
cout << strFileName << endl;
}
}
return true;
}
bool doAddHeadTag(const string& strFileName, const string& strAds)
{
string strContent;
if (getString(strFileName, strContent))
{
size_t start = 0;
string str = strAds;
if (!findHeadTag(strContent, start))
{
if (findHeadTag(strContent, start, "html"))
{
str = "<head>";
str += strAds;
str += "</head>";
}
else
{
return false;
}
}
else
{
if (strContent.substr(start, strAds.size()).compare(strAds) == 0)
{
return false;
}
}
strContent.insert(start, str);
return setString(strFileName, strContent);
}
return false;
}
typedef bool(*CallbackType)(const string&);
bool doGetAllFiles(const string& strPath, CallbackType cb)
{
DIR *dir;
if ((dir = opendir(strPath.c_str())) != NULL)
{
struct dirent *ent;
while ((ent = readdir(dir)) != NULL)
{
switch (ent->d_type)
{
case DT_DIR:
if (strcmp(ent->d_name, ".") != 0 && strcmp(ent->d_name, "..") != 0)
{
if (!isForbidden(ent->d_name))
{
if (!doGetAllFiles(strPath+"/"+ ent->d_name, cb))
{
return false;
}
}
}
break;
case DT_REG:
if (isHtml(ent->d_name))
{
if (!cb(strPath+"/"+ent->d_name))
{
return false;
}
}
break;
default:
break;
}
}
closedir(dir);
}
else
{
perror (strPath.c_str());
return false;
}
return true;
}
bool getAllFiles(const string& strPath, vector<string>& vect)
{
char path[PATH_MAX+1];
if (realpath(strPath.c_str(), path))
{
return doGetAllFiles(path, vect);
}
return false;
}
bool doSearchAds(const string& strFile)
{
string strContent;
if (getString(strFile, strContent))
{
ParserDom dom;
const tree<Node>& tr = dom.parseTree(strContent);
for (tree<Node>::pre_order_iterator it = tr.begin(); it != tr.end(); it++)
{
if (it->isTag() && it->tagName().compare("script") == 0)
{
if (it->text().find("google")!= string::npos)
{
cout << "filename:"<< strFile << " text:" << it->text() << endl;
}
}
}
}
return false;
}
bool doSearchAllFiles(const string& strPath)
{
DIR *dir;
if ((dir = opendir(strPath.c_str())) != NULL)
{
struct dirent *ent;
while ((ent = readdir(dir)) != NULL)
{
string strNewPath = strPath+"/" + ent->d_name;
switch (ent->d_type)
{
case DT_DIR:
if (strcmp(ent->d_name, ".") != 0 && strcmp(ent->d_name, "..") != 0)
{
if (!isForbidden(ent->d_name))
{
if (!doSearchAllFiles(strNewPath))
{
return false;
}
}
}
break;
case DT_REG:
if (isHtml(ent->d_name))
{
doSearchAds(strNewPath);
}
break;
default:
break;
}
}
closedir(dir);
}
else
{
perror (strPath.c_str());
return false;
}
return true;
}
bool searchAds(const string& strPath)
{
char path[PATH_MAX+1];
if (realpath(strPath.c_str(), path))
{
return doSearchAllFiles(path);
}
return false;
}
bool test2(const string& strPath)
{
vector<string> vect;
if (getAllFiles(strPath, vect))
{
for (size_t i =0; i < vect.size(); i ++)
{
cout << vect[i] << endl;
}
cout << "total file number: " << vect.size() << endl;
return true;
}
return false;
}
bool test1(const string& strFileName)
{
const string strAdsFile="/home/nick/googleads.txt";
string strAds;
string strOldContent, strNewContent;
if (getString(strAdsFile, strAds))
{
if (getString(strFileName, strOldContent))
{
if (addAds(strFileName, strAds))
{
if (removeAds(strFileName, strAds))
{
if (getString(strFileName, strNewContent))
{
if (strOldContent.compare(strNewContent) == 0)
{
cout << "content remain unchanged" << endl;
return true;
}
}
}
}
}
}
return false;
}
bool test3(const string& strFileName)
{
const string strAdsFile="/home/nick/googleads.txt";
string strAds;
string strOldContent, strNewContent;
if (getString(strAdsFile, strAds))
{
if (getString(strFileName, strOldContent))
{
if (addAds(strFileName, strAds))
{
if (removeAds(strFileName, strAds))
{
if (getString(strFileName, strNewContent))
{
if (strOldContent.compare(strNewContent) == 0)
{
cout << "content remain unchanged" << endl;
return true;
}
}
}
}
}
}
return false;
}
bool test4(const string& strPath)
{
char path[PATH_MAX+1];
if (realpath(strPath.c_str(), path))
{
return doGetAllFiles(path, doFindHeadTag);
}
return false;
}
bool findAllTags(const string& strFile, const char* pTag)
{
string strContent;
if (getString(strFile, strContent))
{
ParserDom dom;
const tree<Node>& tr = dom.parseTree(strContent);
for (tree<Node>::pre_order_iterator it = tr.begin(); it != tr.end(); it++)
{
if (it->isTag() && strcasecmp(it->tagName().c_str(),pTag) == 0)
{
cout << strContent.substr(it->offset(), it->length()) << endl;
}
}
}
return false;
}
bool test5(const string& strPath)
{
vector<string> vect;
if (getAllFiles(strPath, vect))
{
for (size_t i =0; i < vect.size(); i ++)
{
if (findAllTags(vect[i], "a"))
{
cout << vect[i] << endl;
}
}
cout << "total file number: " << vect.size() << endl;
return true;
}
return false;
}
int myAddAds(const string& strPath, const string& strAdsFile)
{
int result = 0;
string strAds;
if (getString(strAdsFile, strAds))
{
vector<string> vect;
if (getAllFiles(strPath, vect))
{
for (size_t i =0; i < vect.size(); i ++)
{
if (addAds(vect[i], strAds))
{
cout << vect[i] << endl;
result ++;
}
}
}
}
return result;
}
int myAddAdsByAddingHead(const string& strPath, const string& strAdsFile)
{
int result = 0;
string strAds;
if (getString(strAdsFile, strAds))
{
vector<string> vect;
if (getAllFiles(strPath, vect))
{
for (size_t i =0; i < vect.size(); i ++)
{
if (doAddHeadTag(vect[i], strAds))
{
cout << vect[i] << endl;
result ++;
}
}
}
}
return result;
}
int myReplaceAdsByAddingHead(const string& strPath, const string& strOldAdsFile, const string& strNewAdsFile)
{
int result = 0;
string strOldAds, strNewAds;
if (getString(strOldAdsFile, strOldAds) && getString(strNewAdsFile, strNewAds))
{
vector<string> vect;
if (getAllFiles(strPath, vect))
{
for (size_t i =0; i < vect.size(); i ++)
{
if (replaceAds(vect[i], strOldAds, strNewAds))
{
cout << vect[i] << endl;
result ++;
}
}
}
}
return result;
}
int main(int argc, char** argv)
{
if (argc != 3)
{
cout << "usage: " << argv[0] << " <diabloPath> <googleads>" << endl;
return -1;
}
cout << myAddAdsByAddingHead(argv[1], argv[2]) << endl;
return 0;
}