Commit 15399c64 authored by Oleg.Korshul's avatar Oleg.Korshul Committed by Alexander Trofimov

mht

git-svn-id: svn://fileserver/activex/AVS/Sources/TeamlabOffice/trunk/ServerComponents@65236 954022d7-b5bf-4e40-9824-e11837661b57
parent 7027f9fc
...@@ -541,6 +541,7 @@ namespace NSMht ...@@ -541,6 +541,7 @@ namespace NSMht
const std::string xmlFileType = "text/xml"; const std::string xmlFileType = "text/xml";
const std::string cssFileType = "text/css"; const std::string cssFileType = "text/css";
const std::string imageFileType = "image/"; const std::string imageFileType = "image/";
const std::string jsFileType = "application/x-javascript";
const std::string code_7bit = "7bit"; const std::string code_7bit = "7bit";
const std::string code_8bit = "8bit"; const std::string code_8bit = "8bit";
...@@ -548,6 +549,16 @@ namespace NSMht ...@@ -548,6 +549,16 @@ namespace NSMht
const std::string code_Base64 = "base64"; const std::string code_Base64 = "base64";
} }
void string_replace(std::wstring& text, const std::wstring& replaceFrom, const std::wstring& replaceTo)
{
size_t posn = 0;
while (std::wstring::npos != (posn = text.find(replaceFrom, posn)))
{
text.replace(posn, replaceFrom.length(), replaceTo);
posn += replaceTo.length();
}
}
class CInnerFile class CInnerFile
{ {
public: public:
...@@ -562,15 +573,15 @@ namespace NSMht ...@@ -562,15 +573,15 @@ namespace NSMht
std::wstring m_sDstFilePath; std::wstring m_sDstFilePath;
public: public:
void Save(const std::map<std::wstring, std::wstring>& sMap) void Save(const std::map<std::wstring, std::wstring>& sMap, NSUnicodeConverter::CUnicodeConverter* pUnicodeConverter)
{ {
if (m_sContentType.find(Names::cssFileType) != std::wstring::npos || bool bIsCSS = (m_sContentType.find(Names::cssFileType) != std::string::npos) ? true : false;
m_sContentType.find(Names::htmlFileType) != std::wstring::npos || bool bIsHtml = (m_sContentType.find(Names::htmlFileType) != std::string::npos ||
m_sContentType.find(Names::xmlFileType) != std::wstring::npos) m_sContentType.find(Names::xmlFileType) != std::string::npos ||
{ m_sContentType.find(Names::jsFileType) != std::string::npos) ? true : false;
std::wstring sUnicodeData;
std::string sDstEncoding = m_sEncoding;
if (bIsCSS || bIsHtml)
{
if (m_sContentEncoding.find(Names::code_Base64) != std::string::npos) if (m_sContentEncoding.find(Names::code_Base64) != std::string::npos)
{ {
BYTE* pData = NULL; BYTE* pData = NULL;
...@@ -594,12 +605,85 @@ namespace NSMht ...@@ -594,12 +605,85 @@ namespace NSMht
sEnc = "latin1"; sEnc = "latin1";
} }
} }
NSUnicodeConverter::CUnicodeConverter oConverter;
std::wstring sRes = oConverter.toUnicode(m_sData, sEnc.c_str());
// дальше конвертим обратно в нужную кодировку, меняя пути std::wstring sRes = pUnicodeConverter->toUnicode(m_sData, sEnc.c_str());
// TODO:
NSFile::CFileBinary::SaveToFile(m_sDstFilePath, sRes, true); // дальше конвертим обратно в нужную кодировку, меня пути
for (std::map<std::wstring, std::wstring>::const_iterator i = sMap.begin(); i != sMap.end(); i++)
{
std::list<std::wstring> listReplace;
listReplace.push_back(i->first);
// корень
if (true)
{
std::wstring::size_type pos = m_sContentLocation.find(L"//");
std::wstring::size_type start = 0;
if (pos != std::wstring::npos)
start = pos + 3; // '///'
pos = m_sContentLocation.find('/', start);
if (pos != std::wstring::npos)
{
std::wstring sMain = m_sContentLocation.substr(0, pos);
if (0 == i->first.find(sMain))
{
listReplace.push_back(i->first.substr(sMain.length()));
}
}
}
// и относительная
if (true)
{
std::wstring::size_type pos = m_sContentLocation.rfind('/');
if (pos != std::wstring::npos)
{
std::wstring sUrl = m_sContentLocation.substr(0, pos + 1);
if (0 == i->first.find(sUrl))
{
std::wstring sCandidate = i->first.substr(sUrl.length());
listReplace.push_back(sCandidate);
listReplace.push_back(L"./" + sCandidate);
}
}
}
if (bIsHtml)
{
std::wstring sReplace = L"\"" + i->second + L"\"";
for (std::list<std::wstring>::iterator i = listReplace.begin(); i != listReplace.end(); i++)
{
std::wstring sFind1 = L"\"" + *i + L"\"";
std::wstring sFind2 = L"'" + *i + L"'";
string_replace(sRes, sFind1, sReplace);
string_replace(sRes, sFind2, sReplace);
}
}
else
{
std::wstring sReplace = L"url(" + i->second + L")";
for (std::list<std::wstring>::iterator i = listReplace.begin(); i != listReplace.end(); i++)
{
std::wstring sFind1 = L"url(" + *i + L")";
std::wstring sFind2 = L"url('" + *i + L"')";
std::wstring sFind3 = L"url(\"" + *i + L"\")";
string_replace(sRes, sFind1, sReplace);
string_replace(sRes, sFind2, sReplace);
string_replace(sRes, sFind3, sReplace);
}
}
}
std::string sResA = pUnicodeConverter->fromUnicode(sRes, sEnc.c_str());
NSFile::CFileBinary oDstFile;
if (oDstFile.CreateFileW(m_sDstFilePath))
{
oDstFile.WriteFile((BYTE*)sResA.c_str(), (DWORD)sResA.length());
}
oDstFile.CloseFile();
} }
else else
{ {
...@@ -630,12 +714,52 @@ namespace NSMht ...@@ -630,12 +714,52 @@ namespace NSMht
sEnc = "latin1"; sEnc = "latin1";
} }
} }
NSUnicodeConverter::CUnicodeConverter oConverter;
std::wstring sRes = oConverter.toUnicode(m_sData, sEnc.c_str()); std::wstring sRes = pUnicodeConverter->toUnicode(m_sData, sEnc.c_str());
NSFile::CFileBinary::SaveToFile(m_sDstFilePath, sRes, true); NSFile::CFileBinary::SaveToFile(m_sDstFilePath, sRes, true);
} }
} }
} }
void CorrectType()
{
if (m_sContentType == "application/octet-stream")
{
std::string::size_type pos1 = m_sData.find("<HTML");
std::string::size_type pos2 = m_sData.find("<html");
if (pos1 != std::string::npos && pos1 < 100)
{
m_sContentType = Names::htmlFileType;
return;
}
else if (pos2 != std::string::npos && pos2 < 100)
{
m_sContentType = Names::htmlFileType;
return;
}
std::wstring::size_type posExt = m_sContentLocation.rfind('.');
if (posExt != std::wstring::npos)
{
std::wstring sExt = m_sContentLocation.substr(posExt);
posExt = sExt.find('?');
if (std::wstring::npos != posExt)
sExt = sExt.substr(0, posExt);
if (sExt == L".js")
m_sContentType = Names::jsFileType;
else if (sExt == L".png")
m_sContentType = "image/png";
else if (sExt == L".jpg" || sExt == L".jpeg")
m_sContentType = "image/jpg";
else if (sExt == L".gif")
m_sContentType = "image/gif";
else if (sExt == L".css")
m_sContentType = Names::cssFileType;
}
}
}
}; };
class CMhtFile class CMhtFile
...@@ -657,7 +781,7 @@ namespace NSMht ...@@ -657,7 +781,7 @@ namespace NSMht
{ {
m_sFolder = NSFile::CFileBinary::CreateTempFileWithUniqueName(NSFile::CFileBinary::GetTempPath(), L"MHT"); m_sFolder = NSFile::CFileBinary::CreateTempFileWithUniqueName(NSFile::CFileBinary::GetTempPath(), L"MHT");
#if 1 #if 0
m_sFolder = L"D:\\test\\Document\\MHT"; m_sFolder = L"D:\\test\\Document\\MHT";
#endif #endif
...@@ -756,26 +880,32 @@ namespace NSMht ...@@ -756,26 +880,32 @@ namespace NSMht
{ {
nNumber++; nNumber++;
CInnerFile* pFile = i.operator ->(); CInnerFile* pFile = i.operator ->();
std::wstring sFileExt = L".bin"; std::wstring sFileExt = L".png"; // L".bin" - обычно это картинки. Так и будем сохранять
if (pFile->m_sContentType.find(Names::cssFileType) != std::wstring::npos) if (pFile->m_sContentType.find(Names::cssFileType) != std::string::npos)
{ {
sFileExt = L".css"; sFileExt = L".css";
} }
else if (pFile->m_sContentType.find(Names::imageFileType) != std::wstring::npos) else if (pFile->m_sContentType.find(Names::imageFileType) != std::string::npos)
{ {
if (pFile->m_sContentType.find("png") != std::wstring::npos) if (pFile->m_sContentType.find("png") != std::string::npos)
sFileExt = L".png"; sFileExt = L".png";
else if (pFile->m_sContentType.find("gif") != std::string::npos)
sFileExt = L".gif";
else else
sFileExt = L".jpg"; sFileExt = L".jpg";
} }
else if (pFile->m_sContentType.find("xml") != std::wstring::npos) else if (pFile->m_sContentType.find("xml") != std::string::npos)
{ {
sFileExt = L".xml"; sFileExt = L".xml";
} }
else if (pFile->m_sContentType.find("html") != std::wstring::npos) else if (pFile->m_sContentType.find("html") != std::string::npos)
{ {
sFileExt = L".html"; sFileExt = L".html";
} }
else if (pFile->m_sContentType.find("javascript") != std::string::npos)
{
sFileExt = L".js";
}
std::wstring sUrl = L"/" + std::to_wstring(nNumber) + sFileExt; std::wstring sUrl = L"/" + std::to_wstring(nNumber) + sFileExt;
pFile->m_sDstFilePath = m_sFolder + sUrl; pFile->m_sDstFilePath = m_sFolder + sUrl;
m_sUrlMap.insert(std::pair<std::wstring, std::wstring>(pFile->m_sContentLocation, L"." + sUrl)); m_sUrlMap.insert(std::pair<std::wstring, std::wstring>(pFile->m_sContentLocation, L"." + sUrl));
...@@ -783,11 +913,11 @@ namespace NSMht ...@@ -783,11 +913,11 @@ namespace NSMht
for (std::list<CInnerFile>::iterator i = m_arFiles.begin(); i != m_arFiles.end(); i++) for (std::list<CInnerFile>::iterator i = m_arFiles.begin(); i != m_arFiles.end(); i++)
{ {
i->Save(m_sUrlMap); i->Save(m_sUrlMap, &m_oUnicodeConverter);
} }
m_oFile.m_sDstFilePath = m_sFolder + L"/index.html"; m_oFile.m_sDstFilePath = m_sFolder + L"/index.html";
m_oFile.Save(m_sUrlMap); m_oFile.Save(m_sUrlMap, &m_oUnicodeConverter);
} }
inline std::string GetLower(const std::string& sSrc) inline std::string GetLower(const std::string& sSrc)
...@@ -933,7 +1063,18 @@ namespace NSMht ...@@ -933,7 +1063,18 @@ namespace NSMht
boundary = "--" + boundary; boundary = "--" + boundary;
} }
//тип файла (image/, text/html, text/css) //тип файла (image/, text/html, text/css)
else if (CheckProperty(sLowerLine, sLowerLine, Names::contentType_str, oInnerFile.m_sContentType)) {} else if (CheckProperty(sLowerLine, sLowerLine, Names::contentType_str, oInnerFile.m_sContentType))
{
if (oInnerFile.m_sContentType.find(Names::htmlFileType) != std::string::npos)
{
if (sLowerLine.find(".gif") != std::string::npos)
oInnerFile.m_sContentType = "image/gif";
else if (sLowerLine.find(".png") != std::string::npos)
oInnerFile.m_sContentType = "image/png";
else if (sLowerLine.find(".jpg") != std::string::npos || sLowerLine.find(".jpeg") != std::string::npos)
oInnerFile.m_sContentType = "image/jpg";
}
}
//наименование файла //наименование файла
else if (CheckPropertyW(sLowerLine, *i, Names::contentLocation_str, oInnerFile.m_sContentLocation)) {} else if (CheckPropertyW(sLowerLine, *i, Names::contentLocation_str, oInnerFile.m_sContentLocation)) {}
else if (CheckPropertyW(sLowerLine, *i, Names::contentID_str, oInnerFile.m_sContentID)) {} else if (CheckPropertyW(sLowerLine, *i, Names::contentID_str, oInnerFile.m_sContentID)) {}
...@@ -960,6 +1101,7 @@ namespace NSMht ...@@ -960,6 +1101,7 @@ namespace NSMht
} }
} }
oInnerFile.m_sData = oBuilderA.GetData(); oInnerFile.m_sData = oBuilderA.GetData();
oInnerFile.CorrectType();
if (m_oFile.m_sData.empty() && oInnerFile.m_sContentType.find(Names::htmlFileType) != std::wstring::npos) if (m_oFile.m_sData.empty() && oInnerFile.m_sContentType.find(Names::htmlFileType) != std::wstring::npos)
{ {
...@@ -990,7 +1132,18 @@ namespace NSMht ...@@ -990,7 +1132,18 @@ namespace NSMht
{ {
std::string sLowerLine = GetLower(*i); std::string sLowerLine = GetLower(*i);
if (CheckProperty(sLowerLine, sLowerLine, Names::contentType_str, oInnerFile.m_sContentType)) {} if (CheckProperty(sLowerLine, sLowerLine, Names::contentType_str, oInnerFile.m_sContentType))
{
if (oInnerFile.m_sContentType.find(Names::htmlFileType) != std::string::npos)
{
if (sLowerLine.find(".gif") != std::string::npos)
oInnerFile.m_sContentType = "image/gif";
else if (sLowerLine.find(".png") != std::string::npos)
oInnerFile.m_sContentType = "image/png";
else if (sLowerLine.find(".jpg") != std::string::npos || sLowerLine.find(".jpeg") != std::string::npos)
oInnerFile.m_sContentType = "image/jpg";
}
}
//наименование файла //наименование файла
else if (CheckPropertyW(sLowerLine, *i, Names::contentLocation_str, oInnerFile.m_sContentLocation)) {} else if (CheckPropertyW(sLowerLine, *i, Names::contentLocation_str, oInnerFile.m_sContentLocation)) {}
else if (CheckPropertyW(sLowerLine, *i, Names::contentID_str, oInnerFile.m_sContentID)) {} else if (CheckPropertyW(sLowerLine, *i, Names::contentID_str, oInnerFile.m_sContentID)) {}
...@@ -1023,6 +1176,7 @@ namespace NSMht ...@@ -1023,6 +1176,7 @@ namespace NSMht
} }
} }
oInnerFile.m_sData = oBuilderA.GetData(); oInnerFile.m_sData = oBuilderA.GetData();
oInnerFile.CorrectType();
if (m_oFile.m_sData.empty() && oInnerFile.m_sContentType.find(Names::htmlFileType) != std::wstring::npos) if (m_oFile.m_sData.empty() && oInnerFile.m_sContentType.find(Names::htmlFileType) != std::wstring::npos)
{ {
......
...@@ -25,6 +25,7 @@ public: ...@@ -25,6 +25,7 @@ public:
int Convert(const std::vector<std::wstring>& arFiles, const std::wstring& sDstfolder, const std::wstring& sPathInternal = L""); int Convert(const std::vector<std::wstring>& arFiles, const std::wstring& sDstfolder, const std::wstring& sPathInternal = L"");
int ConvertEpub(const std::wstring& sFolder, std::wstring& sMetaInfo, const std::wstring& sDstfolder, const std::wstring& sPathInternal = L""); int ConvertEpub(const std::wstring& sFolder, std::wstring& sMetaInfo, const std::wstring& sDstfolder, const std::wstring& sPathInternal = L"");
int ConvertMht(const std::wstring& sFile, const std::wstring& sDstfolder, const std::wstring& sPathInternal = L"");
}; };
#endif // _HTMLFILE_HTMLFILE_H_ #endif // _HTMLFILE_HTMLFILE_H_
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment