VC 获取网页数据时,如何解决乱码问题

    xiaoxiao2025-07-09  10

    网页编码格式有:UTF-8,GBK,gb2312等,而VC是Unicode编码格式,获取网页数据直接显示会有乱码问题,那么就需要将其他编码格式转换成Unicode格式。 //取网页标题 void CGetWebTitleDlg::OnBnClickedBtnGetTitle() { m_HtmlCode.SetWindowText(_T(""));//clear CInternetSession mySession(NULL,0); CHttpFile* htmlFile=NULL; CString strLine,url,strHtml; TCHAR sRecv[1024]; UINT CodePage=65001;//CP_UTF8:65001 CP_ACP:0 m_Url.GetWindowText(url); TRY { htmlFile=(CHttpFile*)mySession.OpenURL(url);//打开连接 //获取网页编码 while(htmlFile->ReadString(sRecv,1024)) { //先用UTF8来进行转换,如果html页面编码是gbk或gb2312,转换后中文字符为 //乱码,但英文字符显示正常,我们判断html页码编码,通过寻找英文就可以了 int nBufferSize = MultiByteToWideChar(CP_UTF8, 0, (LPCSTR)sRecv, -1, NULL, 0); wchar_t *pBuffer = new wchar_t[nBufferSize+1]; memset(pBuffer,0,(nBufferSize+1)*sizeof(wchar_t)); MultiByteToWideChar(CP_UTF8, 0, (LPCSTR)sRecv, -1 , pBuffer, nBufferSize*sizeof(wchar_t)); strHtml=pBuffer; if (-1!=strHtml.Find(_T("charset=gbk"))) { CodePage=0; delete pBuffer; break; } if (-1!=strHtml.Find(_T("charset=GBK")))//http://www.sohu.com { CodePage=0; delete pBuffer; break; } if (-1!=strHtml.Find(_T("charset=gb2312"))) { CodePage=0; delete pBuffer; break; } if (-1!=strHtml.Find(_T("charset=GB2312"))) { CodePage=0; delete pBuffer; break; } if (-1!=strHtml.Find(_T("charset=utf-8"))) { CodePage=65001; delete pBuffer; break; } if (-1!=strHtml.Find(_T("charset=UTF-8"))) { CodePage=65001; delete pBuffer; break; } delete pBuffer; } strHtml=_T(""); //获取网页源码 <span style="color:#ff9966;"> htmlFile=(CHttpFile*)mySession.OpenURL(url);//重新打开连接 while(htmlFile->ReadString(sRecv,1024)) { // 编码转换,可解决中文乱码问题 //gb2312转为unicode,则用CP_ACP //gbk转为unicode,也用CP_ACP //utf-8转为unicode,则用CP_UTF8 int nBufferSize = MultiByteToWideChar(CodePage, 0, (LPCSTR)sRecv, -1, NULL, 0); wchar_t *pBuffer = new wchar_t[nBufferSize]; memset(pBuffer,0,(nBufferSize+1)*sizeof(wchar_t)); //gb2312转为unicode,则用CP_ACP //gbk转为unicode,也用CP_ACP //utf-8转为unicode,则用CP_UTF8 MultiByteToWideChar(CodePage, 0, (LPCSTR)sRecv, -1 , pBuffer, nBufferSize*sizeof(wchar_t)); strHtml+=pBuffer; strHtml+="\r\n"; delete pBuffer; } </span> htmlFile->Close(); mySession.Close() ; delete htmlFile; m_HtmlCode.SetWindowText(strHtml);//显示网页源码 //获取网页标题 CString szTitle=strHtml.GetString(); int nStart=szTitle.Find(_T("<title>")); int nEnd=szTitle.Find(_T("</title>")); szTitle=szTitle.Mid(nStart+7,nEnd-nStart-7); this->SetWindowText(_T("获取到的网页标题为【")+szTitle+_T("】 By︶风不冷丶")); } CATCH (CException, e) { TCHAR err[1024]; e->GetErrorMessage(err,1024); m_HtmlCode.SetWindowText(err); } END_CATCH }
    转载请注明原文地址: https://ju.6miu.com/read-1300510.html
    最新回复(0)