i don't want to use twebbrowser, anyone know other metod for getting the title of a开发者_运维知识库 website?
You can get the content of the webpage using the InternetOpenUrl
and InternetReadFile
functions and then seek for the <title>
tag.
check this sample app.
program GetTitleHTML;
{$APPTYPE CONSOLE}
uses
WinInet,
StrUtils,
SysUtils;
function GetHTMLTitle(const Url:string):string;
const
BuffSize = 64*1024;
TitleTagBegin='<title>';
TitleTagEnd ='</title>';
var
hInter : HINTERNET;
UrlHandle: HINTERNET;
BytesRead: Cardinal;
Buffer : Pointer;
i,f : Integer;
begin
Result:='';
hInter := InternetOpen('', INTERNET_OPEN_TYPE_PRECONFIG, nil, nil, 0);
if Assigned(hInter) then
begin
GetMem(Buffer,BuffSize);
try
UrlHandle := InternetOpenUrl(hInter, PChar(Url), nil, 0, INTERNET_FLAG_RELOAD,0);
try
if Assigned(UrlHandle) then
begin
InternetReadFile(UrlHandle, Buffer, BuffSize, BytesRead);
if BytesRead>0 then
begin
SetString(Result, PAnsiChar(Buffer), BytesRead);
i:=Pos(TitleTagBegin,Result);
if i>0 then
begin
f:=PosEx(TitleTagEnd,Result,i+Length(TitleTagBegin));
Result:=Copy(Result,i+Length(TitleTagBegin),f-i-Length(TitleTagBegin));
end;
end;
end;
finally
InternetCloseHandle(UrlHandle);
end;
finally
FreeMem(Buffer);
end;
InternetCloseHandle(hInter);
end
end;
begin
try
Writeln(GetHTMLTitle('http://stackoverflow.com/questions/4966888/how-to-get-website-title-from-delphi'));
Writeln(GetHTMLTitle('http://www.google.com/'));
Writeln(GetHTMLTitle('http://stackoverflow.com/questions/tagged/delphi'));
Readln;
except
on E:Exception do
Writeln(E.Classname, ': ', E.Message);
end;
end.
It totally depends on how the web-site sets the title.
The <title>
tag is not the only way, you also have JavaScript, etc.
The best is to encapsulate the page in a web-browser (For instance the TWebBrowser
), then grab the title from there.
This page has some leads on that.
--jeroen
Continuing with the idea of birger and making the code similar to RRUZ, using Indy (the component TidHTTP) the same routine can see similar to this:
function GetHTMLTitle(const Url:string):string;
const
TitleTagBegin='<title>';
TitleTagEnd ='</title>';
var
idH:TidHTTP;
i,f:integer;
begin
idH := TidHTTP.Create();
try
Result := idH.Get(Trim(URL));
// Search theTAGS
i:=Pos(TitleTagBegin,Result);
if i>0 then begin
f:=PosEx(TitleTagEnd,Result,i+Length(TitleTagBegin));
Result:=Copy(Result,i+Length(TitleTagBegin),f-i-Length(TitleTagBegin));
end;
finally
IdH.Free;
end;
end;
Regards
You can also use the Indy TIdHTTP component and use the same approach as RRUZ in his answer.
I have a parser (ATagParser) that makes this kind of thing trivial. It was a commercial product, but I took it off the market several years ago. I still actively use it and develop it and will send it to anyone that asks. It can be used for personal or commercial use as long as credit is given.
BTW, the idea of finding the tags with POS is all well and good, but it will miss title tags with attributes -- and yes, title tags can have attributes (dir, lang etc..)
would fail in the other options given.
The function below can also detect titles like that
<title class="notranslate">Title</title>
Here is the function:
function GetHTMLTitle(const HTML:string):string;
var
tagstart: int64;
tagstop: int64;
titlestop: int64;
temp:string;
titletext: string;
begin
Result:='';
tagstart:=pos('<title',lowercase(html));
if tagstart>0 then
begin
temp:=copy(html,tagstart);
tagstop:=pos('>',temp);
if tagstop>0 then
begin
temp:=copy(temp,tagstop+1);
titlestop:=pos('</title>',lowercase(temp));
if titlestop>0 then
begin
titletext:=copy(temp,1,titlestop-1);
Result:=titletext;
end;
end;
end;
end;
精彩评论