I have this code and I just can't figure it out why is not yielding requests after the parse method.
def start_requests(self):
self.portal = self.get_portal_by_portal_id(self.portal_id)
if self.portal:
url = self.portal.get('url')
if self.historic == "true":
today = datetime.date.today()
if self.historic_year_start:
start_year = self.historic_year_start
else:
start_year = today.year
if self.historic_year_end:
end_year = self.historic_year_end
else:
end_year = today.year
start_date = datetime.datetime.strptime(
f'01-jan-{start_year}', "%d-%b-%Y")
end_date = datetime.datetime.strptime(f'31-dec-{end_year}',
"%d-%b-%Y")
from_date = start_date
to_date = start_date + timedelta(days=7)
while from_date < end_date:
to_date_string = to_date.strftime("%d %b %Y").replace(
' ', '%2F')
from_date_string = from_date.strftime("%d %b %Y").replace(
' ', '%2F')
yield scrapy.Request(url,
body=payload,
headers=self.headers,
method='POST',
dont_filter=True,
callback=self.parse)
# Incrementing FROM and TO dates by a week
from_date = from_date + timedelta(days=8)
to_date = from_date + timedelta(days=6)
"start_requests" works just fine, the problem comes with the next method, I debugged with pdb, step by step and it's getting to the yielding line with a working URL, it just doesn't make the request, I also tried to catch an error with the errback argument but there's not error throwed.
def parse(self, response):
all_urls = response.xpath(
'.//td[@class="apas_tblContent"]/a/@href').extract()
next_page = response.xpath('.//form/div/div/p[a]/a/@href').extract()
self.total += len(all_urls)
for url in all_urls:
item = PlanningItem()
self.current += 1
item['current'] = self.current
url = response.urljoin(url)
item['total'] = self.total
yield scrapy.Request(url,
meta={'item': item},
callback=self.details)
if self.flag:
for url in next_page:
url = response.urljoin(url)
self.flag = False
yield scrapy.Request(url, callback=self.parse)
I debugged until parse method and the URL that are generated works fine in a get request from my browser or PostMan, it just doesn't callback "details" method.
this is the final response:
{'downloader/request_bytes': 109808,
'downloader/request_count': 92,
'downloader/request_method_count/POST': 92,
'downloader/response_bytes': 1066441,
'downloader/response_count': 92,
'downloader/response_status_count/200': 92,
'elapsed_time_seconds': 10.991036,
'finish_reason': 'finished',
'finish_time': datetime.datetime(2021, 1, 7, 3, 13, 15, 673596),
'log_count/DEBUG': 93,
'log_count/INFO': 10,
'log_count/WARNING': 1,
'memusage/max': 83685376,
'memusage/startup': 83685376,
'offsite/domains': 1,
'offsite/filtered': 707,
'request_depth_max': 1,
'response_received_count': 92,
'scheduler/dequeued': 92,
'scheduler/dequeued/memory': 92,
'scheduler/enqueued': 92,
'scheduler/enqueued/memory': 92,
'start_time': datetime.datetime(2021, 1, 7, 3, 13, 4, 682560)}