some time I'm trying to get data from this html table, I tried components paid and free. I tried to do some coding and also got no results. I have a class that throw directly html tables for ClientDataSet, but with this table it does not work. Anyone have any tips on how to get the data in this html table? Or a way to convert it to txt / xls / csv or xml? Follows the code for the table:
WebBrowser1.Navigate('http://site2.aesa.pb.gov.br/aesa/monitoramentoPluviometria.do?metodo=listarMesesChuvasMensais');
WebBrowser1.OleObject.Document.All.Tags('select').Item(0).Value:= '2013';
WebBrowser1.OleObject.Document.All.Tags('select').Item(1).Value:= '7';
WebBrowser1.OleObject.Document.All.Tags('input').Item(1).click;
Memo1.Text:= WebBrowser1.OleObject.Document.All.Tags('table').Item(10).InnerHTML;
Memo1.Lines.SaveToFile('table.html');
The following will extract the data from the HTML table on your target page
and load it into a ClientDataSet.
It's fairly long-winded, perhaps demonstrating that as David said, Delphi
is maybe not the best tool for the job.
On my Form1, I have a TEdit, edValue, for me to key in the value in the first
data row in the HTML table data. I use this as a way to find the table in the
HTML document. I dare say there are better methods, but at least my method should be more robust than hard-coding assumptions about the layout of the document in which the table is embedded that maybe won't survive a change by the page's author.
Broadly, the code works by first finding the HTML table cell using the contents of
my edValue.Text, then finding the table to which the cell belongs, and then
populating the CDS's Fields and data from the table.
The CDS fields are set to 255 characters by default; maybe there's a specification for
the data published on the web page that would allow you to use a smaller value for some, if not all, fields. They're all assumed to be of type ftString, to avoid the code choking on unexpected cell contents.
Btw, at the bottom is a utility function for saving the HTML page locally, to
save having to keep clicking the button for selecting a year + month. To reload
the WebBrowser from the saved file, just use the file's name as the URL to load.
TForm1 = class(TForm)
[ ... ]
public
{ Public declarations }
Doc : IHtmlDocument2;
procedure TForm1.btnFindValueClick(Sender: TObject);
var
Table : IHTMLTable;
begin
Doc := WebBrowser1.Document as IHTMLDocument2;
Table := FindTableByCellValue(edValue.Text);
Assert(Table <> Nil);
LoadCDSFromHTMLTable(CDS, Table);
end;
procedure TForm1.LoadCDSFromHTMLTable(DestCDS : TClientDataSet; Table : IHTMLTable);
var
I,
J : Integer;
vTable : OleVariant;
iRow : IHTMLTableRow;
FieldName,
FieldValue : String;
Field : TField;
const
cMaxFieldSize = 255;
scIDFieldName = 'ID';
begin
// Use OleVariant instead of IHTMLTable becuse it's less fiddly for doing what follows
vTable := Table;
Assert(not DestCDS.Active and (DestCDS.FieldCount = 0));
// First create an AutoInc field
Field := TAutoIncField.Create(Self);
Field.FieldName := scIDFieldName;
Field.DataSet := DestCDS;
// Next create CDS fields from the names in the cells in the first row of the table
for I := 0 to (vTable.Rows.Item(0).Cells.Length - 1) do begin
FieldName := vTable.Rows.Item(0).Cells.Item(I).InnerText;
Field := TStringField.Create(Self);
// At this point, we might want to clean up the FieldName by removing embedded spaces, etc
Field.FieldName := FieldName;
Field.Size := cMaxFieldSize;
Field.DataSet := DestCDS;
end;
DestCDS.DisableControls;
try
DestCDS.IndexFieldNames := scIDFieldName;
DestCDS.CreateDataSet;
// Next load the HTML table data into the CDS
for I := 1 to (vTable.Rows.Length - 1) do begin
DestCDS.Insert;
for J := 0 to vTable.Rows.Item(0).Cells.Length - 1 do begin
FieldValue := vTable.Rows.Item(I).Cells.Item(J).InnerText;
// the J + 1 is because Fields[0] is the autoinc one
DestCDS.Fields[J + 1].AsString := FieldValue;
end;
DestCDS.Post;
end;
DestCDS.First;
finally
DestCDS.EnableControls;
end;
end;
function TForm1.FindTableCellByTagValue(Doc : IHtmlDocument2; const AValue : String) : IHTMLTableCell;
var
All: IHTMLElementCollection;
Value: String;
I,
Len: Integer;
E: OleVariant;
iE : IHTMLElement;
iT : IHTMLTextElement;
iC : IHTMLTableCell;
begin
Result := Nil;
All := Doc.All;
if All = Nil then Exit;
Len := All.Length;
for I := 0 to Len - 1 do begin
E := All.Item(I, varEmpty);
iE := IDispatch(E) as IHTMLElement;
if Supports(iE, IHTMLTableCell, iC) then begin
Value := Trim(iE.Get_InnerText);
if Pos(Trim(AValue), Value) = 1 then begin
Result := iC;
Break;
end
end
else
Continue;
end;
end;
function TForm1.FindTableByCellValue(Value : String): IHTMLTable;
var
Node : IHtmlElement;
iTable : IHTMLTable;
iCell : IHTMLTableCell;
begin
Result := Nil;
iCell := FindTableCellByTagValue(Doc, edValue.Text);
if iCell = Nil then
Exit;
Node := IDispatch(iCell) as IHtmlElement;
// if we found a Node with the cell text we were looking for,
// we can now find the HTML table to which it belongs
while Node <> Nil do begin
Node := Node.parentElement;
if Supports(Node, IHTMLTable, iTable) then begin
Result := iTable;
Break;
end;
end;
end;
procedure TForm1.SaveFileLocally(const FileName : String);
var
PFile: IPersistFile; // declared in ActiveX unit
begin
PFile := Doc as IPersistFile;
PFile.Save(StringToOleStr(FileName), False);
end;
after some time studying I finally extract data from html table. To simplify I can extract data from html table directly, without having to 'parse' it was the tag 'table' and 'item' 11 the 'item' 10 had the same data but in a single cell. So what I did, I took each element of the table in html and StringGrid filled one, and then found a way to directly populate the dbgrid through ClientDataSet. I'll post the code (unit) to stand as an example and for that you need someone. I wanted to thank everyone who helped me in the comments. With more study'm seeing that the best way to do this procedure is to MSHTML.
unit Unit1;
interface
uses
Winapi.Windows, Winapi.Messages, System.SysUtils, System.Variants, System.Classes, Vcl.Graphics,
Vcl.Controls, Vcl.Forms, Vcl.Dialogs, Vcl.OleCtrls, SHDocVw, Vcl.StdCtrls,
Vcl.Grids, Vcl.DBGrids, Data.DB, Datasnap.DBClient;
type
TForm1 = class(TForm)
WebBrowser1: TWebBrowser;
DBGrid1: TDBGrid;
StringGrid1: TStringGrid;
Button1: TButton;
Button2: TButton;
ClientDataSet1: TClientDataSet;
DataSource1: TDataSource;
ClientDataSet1MunicípioPosto: TStringField;
ClientDataSet1TotalMensalmm: TStringField;
ClientDataSet1ClimatologiaMensalmm: TStringField;
ClientDataSet1Desviomm: TStringField;
ClientDataSet1Desvio: TStringField;
ClientDataSet1id: TAutoIncField;
procedure FormCreate(Sender: TObject);
procedure Button1Click(Sender: TObject);
procedure Button2Click(Sender: TObject);
private
{ Private declarations }
public
{ Public declarations }
end;
var
Form1: TForm1;
implementation
{$R *.dfm}
procedure TForm1.Button1Click(Sender: TObject);
var
irow, jcol: Integer;
ovTable: OleVariant;
begin
ovTable := WebBrowser1.OleObject.Document.all.tags('table').item(11);
ShowMessage('Number of Rows: '+IntToStr(ovTable.Rows.Length));
ShowMessage('Number of Cols: '+IntToStr(ovTable.Rows.Item(0).Cells.Length));
StringGrid1.RowCount:= ovTable.Rows.Length+1;
StringGrid1.ColCount:= ovTable.Rows.Item(0).Cells.Length+1;
for irow := 0 to (ovTable.Rows.Length - 1) do
begin
for jcol := 0 to (ovTable.Rows.Item(irow).Cells.Length - 1) do
begin
StringGrid1.Cells[jcol+1, irow+1] := ovTable.Rows.Item(irow).Cells.Item(jcol).InnerText;
end;
end;
end;
procedure TForm1.Button2Click(Sender: TObject);
var
iRow : Integer;
iCol : Integer;
ovTable: OleVariant;
begin
ovTable := WebBrowser1.OleObject.Document.all.tags('table').item(11);
for iRow := 1 to (ovTable.Rows.Length - 1) do
begin
ClientDataSet1.Open;
ClientDataSet1.insert;
for iCol := 0 to (ovTable.Rows.Item(iRow).Cells.Length - 1) do
begin
ClientDataSet1.FieldByname('Município/Posto').AsString:=ovTable.Rows.Item(iRow).Cells.Item(0).InnerText;
ClientDataSet1.FieldByname('Total Mensal (mm)').AsString:=ovTable.Rows.Item(iRow).Cells.Item(1).InnerText;
ClientDataSet1.FieldByname('Climatologia Mensal (mm)').AsString:=ovTable.Rows.Item(iRow).Cells.Item(2).InnerText;
ClientDataSet1.FieldByname('Desvio (mm)').AsString:=ovTable.Rows.Item(iRow).Cells.Item(3).InnerText;
ClientDataSet1.FieldByname('Desvio (%)').AsString:=ovTable.Rows.Item(iRow).Cells.Item(4).InnerText;
end;
ClientDataSet1.Post;
ClientDataSet1.IndexFieldNames:= 'id';
ClientDataSet1.First;
end;
end;
procedure TForm1.FormCreate(Sender: TObject);
begin
WebBrowser1.Navigate('C:\htmlwiththetable.html');
end;
end
.