You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
This repo is archived. You can view files and clone it, but cannot push or open issues/pull-requests.
 
 

152 lines
4.7 KiB

from bs4 import BeautifulSoup
import re
import requests
import sys
def article(identifier, locale="com"):
article_link = "https://www.ebay." + locale + "/itm/" + identifier
soup = BeautifulSoup(requests.get(article_link).text, "lxml")
article = {}
title = soup.find("h1", id="itemTitle")
if title:
title = title.text[len("Details about "):]
else:
title = "Error, could not scrape title"
image_list = soup.find("ul", class_="lst icon")
images = []
if image_list:
for image in image_list.find_all("img"):
src = image["src"]
base_src = src[0 : len(src) - len("s-l64.jpg")]
images.append(
{
"lowres": base_src + "s-l64.jpg",
"medres": base_src + "s-l300.jpg",
"highres": base_src + "s-l1600.jpg",
}
)
else:
# sometimes an article has just one image, there we need to use this method because there are no small preview images, just the mid-sized image
image = soup.find('img', id='icImg')
if image:
src = image["src"]
base_src = src[0 : len(src) - len("s-l300.jpg")]
images.append(
{
"lowres": base_src + "s-l64.jpg",
"medres": base_src + "s-l300.jpg",
"highres": base_src + "s-l1600.jpg",
}
)
images.append(image)
price = soup.find("span", id="prcIsum")
if price:
price = price.text
else:
price = "Error, unable to scrape price"
condition = soup.find("div", id="vi-itm-cond")
if condition:
condition = condition.text
else:
condition = "Error, unable to scrape condition"
# quantity =
shipping = soup.find("span", id="fshippingCost")
if shipping:
shipping = shipping.text
else:
shipping = "Error, could not scrape shipping"
description = soup.find("div", id="vi-desc-maincntr").text
if not description:
description = "Error, unable to scrape description"
returns = soup.find("span", id="vi-ret-accrd-txt").text
if not returns:
returns = "Error, unabe to scrape returns"
delivery = soup.find(
"span", id="delSummary"
) # TODO: extract details more accurately
if delivery:
delivery = delivery.text
else:
delivery = "Error, unable to scrape delivery"
payment_options_ = soup.find("div", id="payDet1")
payment_options = []
if payment_options_:
for option in payment_options_.find_all("img"):
payment_options.append({"title": option["title"]})
article["title"] = title
article["images"] = images
article["price"] = price
article["shipping"] = shipping
article["returns"] = returns
article["delivery"] = delivery
article["payment_options"] = payment_options
article["condition"] = condition
article["description"] = description
article["link"] = "https://www.ebay." + locale + "/itm/" + identifier
return article
def search(term, page=1, locale="com"):
search_link = (
"https://www.ebay." + locale + "/sch/i.html?_nkw=" + term
) # + "&_pgn=" + str(page)
soup = BeautifulSoup(requests.get(search_link).text, "lxml")
entries = soup.find_all("li", class_="s-item")
results = []
for entry in entries:
title = entry.find("h3", class_="s-item__title")
price = entry.find("span", class_="s-item__price")
image = entry.find("img", class_="s-item__image-img")
secondary_info = entry.find("span", class_="SECONDARY_INFO")
hotness = entry.find("span", class_="s-item__hotness")
shipping = entry.find("span", class_="s-item__shipping")
location = entry.find("span", class_="s-item__location")
link = entry.find("a", class_="s-item__link")
result = {}
if title:
result["title"] = title.text
if price:
result["price"] = price.text
if secondary_info:
result["secondary_info"] = secondary_info.text
if image:
result["image"] = image["src"]
if hotness:
result["hotness"] = hotness.text
if shipping:
result["shipping"] = shipping.text
if location:
result["location"] = location.text
if link:
result["link"] = link["href"]
result["identifier"] = (result["link"])[len("https://www.ebay." + locale + "/itm/") :]
if len(result) > 0:
results.append(result)
return results
if __name__ == "__main__":
article(
"LEGO-Technic-42069-Extreme-Adventure-Truck-NEW-and-SEALED-RARE/284135418987/?hash=item4227ca6c6b:g:4tIAAOSwqqdf8Kr5"
)