Somewhere in the mist i have tangled myself running these code gives me "weird" errors and it seems like i am missing a module but cant seem to get it work even after reading the error messages many times.
Anyone that has a clue on whats wrong here?
Happy new year and thanks in advance!
import requests
from bs4 import BeautifulSoup
import csv
def get_page(url):
response = requests.get(url)
if not response.ok:
print('Server responded:', response.status_code)
else:
soup = BeautifulSoup(response.text, 'lxml')
return soup
def get_detail_data(soup):
try:
product = soup.find('span',{'class':'a-size-large product-title-word-break'}).text
except:
product = ''
try:
price = soup.find('span',{'class':'a-size-medium a-color-price priceBlockBuyingPriceString'}).text.strip()
currency, price = p.split(' ')
except:
currency = ''
price = ''
try:
amount = soup.find('span', class_='a-size-medium a-color-state').find('a').text.strip()
except:
amount = ''
data = {
'product': product,
'price': price,
'currency': currency,
'amount': amount,
}
return data
def get_index_data(soup):
try:
links = soup.find_all('a',class_='a-link-normal a-text-normal')
except:
links = []
urls = [item.get('href') for item in links]
return urls
def write_csv(data, url):
with open('hardware.csv', 'a') as csvfile:
writer = csv.writer(csvfile)
row = [data['title'], data['price'], data['currency'], data['amount'], url]
writer.writerow(row)
def main():
url = 'https://www.amazon.se/s?k=grafikkort&page=1'
products = get_index_data(get_page(url))
for link in products:
data = get_detail_data(get_page(link))
write_csv(data, link)
if __name__ == '__main__':
main()
And the Error messages.
Traceback (most recent call last):
File "scrp.py", line 75, in <module>
main()
File "scrp.py", line 71, in main
data = get_detail_data(get_page(link))
File "scrp.py", line 7, in get_page
response = requests.get(url)
File "/usr/lib/python3/dist-packages/requests/api.py", line 75, in get
return request('get', url, params=params, **kwargs)
File "/usr/lib/python3/dist-packages/requests/api.py", line 60, in request
return session.request(method=method, url=url, **kwargs)
File "/usr/lib/python3/dist-packages/requests/sessions.py", line 519, in request
prep = self.prepare_request(req)
File "/usr/lib/python3/dist-packages/requests/sessions.py", line 452, in prepare_request
p.prepare(
File "/usr/lib/python3/dist-packages/requests/models.py", line 313, in prepare
self.prepare_url(url, params)
File "/usr/lib/python3/dist-packages/requests/models.py", line 387, in prepare_url
raise MissingSchema(error)
requests.exceptions.MissingSchema: Invalid URL '/ASUS-NVIDIA-GeForce-grafikkort-kylning/dp/B07489XSJP?dchild=1': No schema supplied. Perhaps you meant http:///ASUS-NVIDIA-GeForce-grafikkort-kylning/dp/B07489XSJP?dchild=1?