底下的代码是在搜索引擎中中截取几段字符串(关键字,连接等等......)放到
数据库中,但是在运行中,老出现
ent-type" c>
请高手帮帮忙,看看错误到底出在哪? 急~~~~~~~
小弟在这里谢谢了..
<[email=%@LANGUAGE=]%@LANGUAGE="VBSCRIPT[/email]" CODEPAGE="936"%>
<%
dim DBname,connstr,conn
DBname="Temp.mdb" '数据库路径,请用相对路径
c+
server.mappath(""&DBname&"")+";DefaultDir=;DRIVER={
Microsoft Access Driver (*.mdb)};"
set conn=server.createobject("ADODB.CONNECTION")
conn.open connstr
%>
<%
'option explicit
Dim start,keywords
start = Request.QueryString("start")
keywords = Request.QueryString("keywords")
'开始错误处理
On Error Resume Next
If Err.Number <> 0 Then
Response.Clear
'显示错误信息给用户
Response.Write "<p align='center' ><font size=3> 出错了,请重新打开Google搜索.</font></p>"
end if
%>
<HTML>
<HEAD>
<TITLE>
网络资源</TITLE>
</HEAD>
<STYLE type=text/css>
<!--
body,td{font-family:arial}
TD{FONT-SIZE:9pt;LINE-HEIGHT:18px}
.cred{color:#FF0000}
//-->
</STYLE>
<BODY leftmargin="0" topmargin="3" marginwidth="0" marginheight="0">
<table align="center" width="98%" cellspacing="0" cellpadding="0" border="0" bgcolor="#ffffff" >
<tr>
<form name="f1" method="post" action="searchi_gg2.asp">
<td width=150 height=50>
搜索引擎
</td>
<td align="left">
<input name=keywords size="40" maxlength="100" title="输入关键字,然后Let's Searching..." value="<%=keywords%>">
<input type="submit" value=" 二次搜索 ">
</td></form></tr>
</table>
<%
Dim strUrl,strTmp_gg,strInfo,strPage,strPageSum_gg,strQtime_gg
Dim bNoResult_gg,regEx,patrn,strPage1
Dim strArray_ggTemp,iNa,i,strArray_gg(30)
If keywords<>"" then
'Google查询字符串
strUrl ="
http://www.google.cn/search?hl=zh-CN&lr=lang_zh-CN&ie=gb2312&oe=UTF-8&sa=N&num=30&start="&start&"&q="&keywords
'开始采集
strtmp_gg = GetHTTPPage(strUrl)
' 开始轮询
服务器1,突破Google暂时屏蔽ip的问题
if strtmp_gg="0" then
strUrl="
http://203.208.33.101/search?hl=zh-CN&lr=lang_zh-CN&ie=gb2312&oe=UTF-8&sa=N&num=30&start="&start&"&q="&keywords
strtmp_gg = GetHTTPPage(strUrl)
' 开始轮询服务器2
if strtmp_gg="0" Then
strUrl="
http://66.249.89.99//search?hl=zh-CN&lr=lang_zh-CN&ie=gb2312&oe=UTF-8&sa=N&num=30&start="&start&"&q="&keywords
strtmp_gg = GetHTTPPage(strUrl)
end if
end if
If InStr(strtmp_gg,"找不到和您的查询")<>0 or (InStr(strtmp_gg,"Server Error")<>0) Or strtmp_gg="0" Then
bNoResult_gg=1
End If
'截取"搜索结果"部分的内容
strinfo = strCut(strtmp_gg,"<div>","</div>",2)
strArray_ggTemp = Split(strinfo,"<p class=g>")
iNa=Ubound(strArray_ggTemp)
'过滤掉Google快照和类似网页功能
for i = 1 to iNa
strArray_gg(i) = "<font size=3><a"&strCut(strArray_ggTemp(i),"<a","- </font>",2)& "</font>"
next
set strArray_ggTemp=nothing
'截取"分页区"部分的内容
strPage1 = strCut(strTmp_gg,"valign=bottom nowrap><font size=-1>","</table></div>",2)
strPage1 = Replace(strPage1,"href=/search?","href=searchi_gg1.asp?keywords="&keywords&"&")
'去除页码图片
patrn="<img.+?br>"
strPage=regReplace(patrn,strPage1,"")
patrn="<td.+?right>"
strPage=regReplace(patrn,strPage,"")
patrn="<td.+?nowrap>"
strPage=regReplace(patrn,strPage," ")
strPage=Replace(strPage,"</font>","")
'结果数量与用时
strPageSum_gg=strCut(strtmp_gg,"有 <b>","</b> 项符合",2)
strQtime_gg=strCut(strtmp_gg,"搜索用时 <b>","</b> 秒",2)
Set strTmp_gg=nothing
End If
%>
<!-- T1-Start -->
<table cellspacing=0 cellpadding=0 border=0 width=98% align="center">
<tr valign=center align=middle height=18>
<td width=1 bgcolor=#999999>
<td nowrap style="FONT-WEIGHT:bold;COLOR:#ffffff;BACKGROUND-COLOR:#0033cc" width=109>网络资源:</td>
<td width="865" align=right bgcolor=#eeeeee><nobr>找到符合<b><%=keywords%></b>的相关网页<b><%=strPageSum_gg%></b>篇,用时<b><%=strQtime_gg%></b>秒</nobr> </td>
</tr>
<tr><td bgcolor=#999999 colspan=3 height=2></td></tr></table>
</td>
</tr>
</table>
<%
if keywords="" then
Response.Write "<p align='center' ><font size=-1> 您好,请在搜索框中输入关键词.</font></p>"
elseif bNoResult_gg=1 then
Response.Write "<p align='center' ><font size=-1> 抱歉,未找到任何符合您查询条件的信息,请重新选择合适的关键词进行查询.</font></p>"
else
%>
<table width="98%" align="center" cellspacing="0" cellpadding="0" border="0">
<tr>
<td style=line-height:160% bgcolor="#ffffff" width="75%" valign=top><br>
<%
for i = 1 to iNa
dim tempstr,
sql,x,tempRex
x=0
x=100-i*0.5
tempstr=strArray_gg(i)
tempReg=""
Set regEx = New RegExp
'regEx.Pattern = "http.*?""" '表达式
regEx.Pattern = "<a.*?href=""(.*?)"".*?>(.*?)</a>"
regEx.IgnoreCase = True
regEx.Global = True
Set Matches = regEx.Execute(tempstr) 'str 为要匹配的字符串
For Each Match in Matches
tempRex = Match.Value
Next
set regEx=nothing
tempRegb=""
Set regEx = New RegExp
regEx.Pattern = "http.*?""" '表达式
'regEx.Pattern = "<a.*?href=""(.*?)"".*?>(.*?)</a>"
regEx.IgnoreCase = True
regEx.Global = True
Set Matches = regEx.Execute(tempstr) 'str 为要匹配的字符串
For Each Match in Matches
tempRexb = Match.Value
Next
sql="Insert into [serach](title,prop,keywords,[hand],UrlStr) values('"&FilterHtml(TempRex)&"','google','"&keywords&"',"&x&",'"&FilterHtml(tempRexb)&"')"
conn.execute(sql)
response.write strArray_gg(i)&"<br><br>"
'response.write FilterHtml(TempRex)&"<br><br>"
next
%>
</td>
<td width="25%" valign=top><br>
</td>
</tr>
</table>
<table width="98%" align="center" cellspacing="0" cellpadding="4" border="0">
<tr>
<td align="center">
<br><font size=3><%=strPage%></font>
</td>
</tr>
</table>
<%End If
Set strinfo=Nothing
Set strArray_gg(10)=nothing
%>
<hr size="1" width="760" color="#0000ff">
<div align="center"><font size=-1>bu毕业设计<span class="cred"></span></a></font></div>
</BODY>
</HTML>
<%
'采集函数
Function getHTTPPage(url)
On Error Resume Next
dim http
set http=Server.createobject("MSXML2.XMLHTTP")
Http.open "GET",url,false
Http.send()
if Http.readystate<>4 then
exit function
end if
getHTTPPage=Http.responsetext
set http=nothing
If Err.number<>0 then
getHTTPPage="0"
'Response.Write "<div align='center'><b>服务器获取文件内容出错</b></div>"
Err.Clear
End If
End function
'截取字符串,1.包括前后字符串,2.不包括前后字符串
Function strCut(strContent,StartStr,EndStr,CutType)
Dim S1,S2
On Error Resume Next
Select Case CutType
Case 1
S1 = InStr(strContent,StartStr)
S2 = InStr(S1,strContent,EndStr)+Len(EndStr)
Case 2
S1 = InStr(strContent,StartStr)+Len(StartStr)
S2 = InStr(S1,strContent,EndStr)
End Select
If Err Then
strCute = "<p align='center' ><font size=-1>截取字符串出错.</font></p>"
Err.Clear
Exit Function
Else
strCut = Mid(strContent,S1,S2-S1)
End If
End Function
'正则替换函数
Function regReplace(patrn,strSource,strReplace)
Set regEx = New RegExp ' 建立正则表达式。
regEx.Pattern = patrn ' 设置模式。
regEx.IgnoreCase = true
regEx.Global = true
regReplace=regEx.replace(strSource,strReplace)
Set regEx=nothing
End Function
'替换字符串中的html脚本
Public Function FilterHtml(Str)
If Trim(Str) = "" Or IsNull(Str) Then
FilterHtml=""
Else
' Str = Replace(Str, ">", ">")
' Str = Replace(Str, "<", "<")
' Str = Replace(Str, Chr(32), " ")
' Str = Replace(Str ,Chr(9), " ")
Str = Replace(Str, Chr(34), "")
Str = Replace(Str, Chr(39), "")
' Str = Replace(Str, Chr(13), "")
' Str = Replace(Str, Chr(10) & Chr(10), "</p><p>")
' Str = Replace(Str, Chr(10), "<br> ")
FilterHtml = Str
End If
End Function
%>