Try to provide proper Cookies and Content-Type headers within a request, take a look at the below example, it uses MSXML2.ServerXMLHTTP
to manage with cookies:
Option Explicit
Sub scrape_kody_poczta_polska_pl()
Dim sRespHeaders As String
Dim aSetHeaders
Dim sPayload As String
Dim sRespText As String
Dim aRows
Dim aCells
Dim i As Long
Dim j As Long
Dim aData
' Get search page to retrieve cookies
XmlHttpRequest _
"GET", _
"http://kody.poczta-polska.pl/", _
Array(), _
"", _
sRespHeaders, _
""
' Extract cookies
ParseResponse "^Set-(Cookie): (S*?=S*?);[sS]*?$", sRespHeaders, aSetHeaders
' Setup request
sPayload = "kod=20-610&page=kod"
PushItem aSetHeaders, Array("Content-Type", "application/x-www-form-urlencoded")
' Retrieve results
XmlHttpRequest _
"POST", _
"http://kody.poczta-polska.pl/index.php", _
aSetHeaders, _
sPayload, _
"", _
sRespText
' Parse table rows
ParseResponse _
"(<tr>(?:[sS]*?<t[dh]>[sS]*?</t[dh]>)+?[sS]*?</tr>)", _
sRespText, _
aRows
' Parse table cells
For i = 0 To UBound(aRows)
ParseResponse _
"<t[dh]>([sS]*?)</t[dh]>", _
aRows(i), _
aCells, _
False
For j = 0 To UBound(aCells)
aCells(j) = DecodeHTMLEntities((aCells(j)))
Next
aRows(i) = aCells
Next
' Output
With ThisWorkbook.Sheets(1)
.Cells.Delete
.Cells.HorizontalAlignment = xlCenter
.Cells.VerticalAlignment = xlTop
aData = Denestify(aRows)
If IsArray(aData) Then Output2DArray .Cells(1, 1), aData
End With
End Sub
Sub XmlHttpRequest(sMethod, sUrl, aSetHeaders, sPayload, sRespHeaders, sRespText)
Dim aHeader
With CreateObject("MSXML2.ServerXMLHTTP")
.SetOption 2, 13056 ' SXH_SERVER_CERT_IGNORE_ALL_SERVER_ERRORS
.Open sMethod, sUrl, False
For Each aHeader In aSetHeaders
.SetRequestHeader aHeader(0), aHeader(1)
Next
.Send sPayload
sRespHeaders = .GetAllResponseHeaders
sRespText = .ResponseText
End With
End Sub
Sub ParseResponse(sPattern, sResponse, aData, Optional bAppend As Boolean = True)
Dim oMatch
Dim aTmp()
Dim sSubMatch
If Not (IsArray(aData) And bAppend) Then aData = Array()
With CreateObject("VBScript.RegExp")
.Global = True
.MultiLine = True
.Pattern = sPattern
For Each oMatch In .Execute(sResponse)
If oMatch.SubMatches.Count = 1 Then
PushItem aData, oMatch.SubMatches(0)
Else
aTmp = Array()
For Each sSubMatch In oMatch.SubMatches
PushItem aTmp, sSubMatch
Next
PushItem aData, aTmp
End If
Next
End With
End Sub
Sub PushItem(aData, vItem, Optional bAppend As Boolean = True)
If Not (IsArray(aData) And bAppend) Then aData = Array()
ReDim Preserve aData(UBound(aData) + 1)
aData(UBound(aData)) = vItem
End Sub
Function DecodeHTMLEntities(sText As String) As String
Static oHtmlfile As Object
Static oDiv As Object
If oHtmlfile Is Nothing Then
Set oHtmlfile = CreateObject("htmlfile")
oHtmlfile.Open
Set oDiv = oHtmlfile.createElement("div")
End If
oDiv.innerHTML = sText
DecodeHTMLEntities = oDiv.innerText
End Function
Function Denestify(aRows)
Dim aData()
Dim aItems()
Dim i As Long
Dim j As Long
If UBound(aRows) = -1 Then Exit Function
ReDim aData(1 To UBound(aRows) + 1, 1 To 1)
For j = 0 To UBound(aRows)
aItems = aRows(j)
For i = 0 To UBound(aItems)
If i + 1 > UBound(aData, 2) Then ReDim Preserve aData(1 To UBound(aRows) + 1, 1 To i + 1)
aData(j + 1, i + 1) = aItems(i)
Next
Next
Denestify = aData
End Function
Sub Output2DArray(oDstRng As Range, aCells As Variant)
With oDstRng
.Parent.Select
With .Resize( _
UBound(aCells, 1) - LBound(aCells, 1) + 1, _
UBound(aCells, 2) - LBound(aCells, 2) + 1)
.NumberFormat = "@"
.Value = aCells
End With
End With
End Sub
The output for me as follows:
and that is the same as results on the webpage:
I add some variables values below, it may help to debug in case of any issues. To watch the content of sRespHeaders
and sRespText
I used additional procedure WriteTextFile
from this answer.
sRespHeaders
after the first XmlHttpRequest
call (execute WriteTextFile sRespHeaders, "C:mp.txt", -1
):
Cache-Control: no-store, no-cache, must-revalidate, post-check=0, pre-check=0
Date: Sat, 26 Aug 2017 14:24:48 GMT
Pragma: no-cache
Transfer-Encoding: chunked
Content-Type: text/html; charset=UTF-8
Expires: Thu, 19 Nov 1981 08:52:00 GMT
Server: Apache
Set-Cookie: PHPSESSID=rl4gc6nq91tfb34u2inj634u10; path=/
Set-Cookie: restrwww4=!hN5+tRTsssR9ii3Yf8b335uDNFxhmd5PNCjvCndeUeIwBxZnB38oHuGc9Nz19debb6vLbW1nYQ+Ncgw=; path=/; Httponly
X-Cnection: close
aSetHeaders
after extracting cookies:
Relevant part sRespText
containing a table with target data after the second XmlHttpRequest
call (execute WriteTextFile sRespText, "C:mp.htm", -1
):
<table border="0" width="100%">
<tr>
<th>lp.</th>
<th>kod PNA</th>
<th>nazwa <br />(firmy lub placówki pocztowej)</th>
<th>miejscowo??</th>
<th>adres</th>
<th>województwo</th>
<th>powiat</th>
<th>gmina</th>
</tr>
<tr>
<td>1.</td>
<td>20-610</td>
<td></td>
<td>Lublin</td>
<td> Kajetana Hryniewieckiego <br />
<i>numery od 1 do końca obie strony</i>
</td>
<td>LUBELSKIE</td>
<td>Lublin</td>
<td>Lublin</td>
</tr>
<tr>
<td>2.</td>
<td>20-610</td>
<td></td>
<td>Lublin</td>
<td> Leszka Czarnego <br />
<i>numery od 1 do końca obie strony</i>
</td>
<td>LUBELSKIE</td>
<td>Lublin</td>
<td>Lublin</td>
</tr>
<tr>
<td>3.</td>
<td>20-610</td>
<td></td>
<td>Lublin</td>
<td> Mieszka I <br />
<i>numery od 1 do końca obie strony</i>
</td>
<td>LUBELSKIE</td>
<td>Lublin</td>
<td>Lublin</td>
</tr>
<tr>
<td>4.</td>
<td>20-610</td>
<td></td>
<td>Lublin</td>
<td> Piastowska <br />
<i>numery od 1 do końca obie strony</i>
</td>
<td>LUBELSKIE</td>
<td>Lublin</td>
<td>Lublin</td>
</tr>
</table>
aRows
after parsing table rows:
aRows
after parsing table cells:
aData
after Denestify
call: