0

I want to scrape the var combinations. It doesn't work with my normal scraping method with attrs. I just want to print out the whole var combinations line. This is my code which won't work:

soup = BeautifulSoup(test1.text, 'html.parser')
        data = soup.find_all('combinations ').string

And I want to scraper this:

var combinations = {"94128":{"attributes_values":{"6":"US 4 EU 36"},"attributes":[246],"price":0,"specific_price":false,"ecotax":0,"weight":0,"quantity":0,"reference":"ASI006028T036","unit_impact":null,"minimal_quantity":"1","available_date":"","hideonline":null,"id_image":-1,"list":"'246'"},"94129":{"attributes_values":{"6":"US 4,5 EU 37"},"attributes":[247],"price":0,"specific_price":false,"ecotax":0,"weight":0,"quantity":0,"reference":"ASI006028T037","unit_impact":null,"minimal_quantity":"1","available_date":"","hideonline":null,"id_image":-1,"list":"'247'"},"94130":{"attributes_values":{"6":"US 5 EU 37,5"},"attributes":[248],"price":0,"specific_price":false,"ecotax":0,"weight":0,"quantity":0,"reference":"ASI006028T375","unit_impact":null,"minimal_quantity":"1","available_date":"","hideonline":null,"id_image":-1,"list":"'248'"},"94131":{"attributes_values":{"6":"US 5,5 EU 38"},"attributes":[249],"price":0,"specific_price":false,"ecotax":0,"weight":0,"quantity":0,"reference":"ASI006028T038","unit_impact":null,"minimal_quantity":"1","available_date":"","hideonline":null,"id_image":-1,"list":"'249'"},"94132":{"attributes_values":{"6":"US 6 EU 39"},"attributes":[250],"price":0,"specific_price":false,"ecotax":0,"weight":0,"quantity":0,"reference":"ASI006028T039","unit_impact":null,"minimal_quantity":"1","available_date":"","hideonline":null,"id_image":-1,"list":"'250'"},"94133":{"attributes_values":{"6":"US 6,5 EU 39,5"},"attributes":[251],"price":0,"specific_price":false,"ecotax":0,"weight":0,"quantity":0,"reference":"ASI006028T395","unit_impact":null,"minimal_quantity":"1","available_date":"","hideonline":null,"id_image":-1,"list":"'251'"},"94134":{"attributes_values":{"6":"US 7 EU 40"},"attributes":[252],"price":0,"specific_price":false,"ecotax":0,"weight":0,"quantity":0,"reference":"ASI006028T040","unit_impact":null,"minimal_quantity":"1","available_date":"","hideonline":null,"id_image":-1,"list":"'252'"},"94135":{"attributes_values":{"6":"US 7,5 EU 40,5"},"attributes":[253],"price":0,"specific_price":false,"ecotax":0,"weight":0,"quantity":0,"reference":"ASI006028T405","unit_impact":null,"minimal_quantity":"1","available_date":"","hideonline":null,"id_image":-1,"list":"'253'"},"94136":{"attributes_values":{"6":"US 8 EU 41,5"},"attributes":[254],"price":0,"specific_price":false,"ecotax":0,"weight":0,"quantity":1,"reference":"ASI006028T415","unit_impact":null,"minimal_quantity":"1","available_date":"","hideonline":null,"id_image":-1,"list":"'254'"},"94137":{"attributes_values":{"6":"US 8,5 EU 42"},"attributes":[255],"price":0,"specific_price":false,"ecotax":0,"weight":0,"quantity":1,"reference":"ASI006028T042","unit_impact":null,"minimal_quantity":"1","available_date":"","hideonline":null,"id_image":-1,"list":"'255'"},"94138":{"attributes_values":{"6":"US 9 EU 42,5"},"attributes":[256],"price":0,"specific_price":false,"ecotax":0,"weight":0,"quantity":1,"reference":"ASI006028T425","unit_impact":null,"minimal_quantity":"1","available_date":"","hideonline":null,"id_image":-1,"list":"'256'"},"94139":{"attributes_values":{"6":"US 9,5 EU 43,5"},"attributes":[257],"price":0,"specific_price":false,"ecotax":0,"weight":0,"quantity":1,"reference":"ASI006028T435","unit_impact":null,"minimal_quantity":"1","available_date":"","hideonline":null,"id_image":-1,"list":"'257'"},"94140":{"attributes_values":{"6":"US 10 EU 44"},"attributes":[258],"price":0,"specific_price":false,"ecotax":0,"weight":0,"quantity":1,"reference":"ASI006028T044","unit_impact":null,"minimal_quantity":"1","available_date":"","hideonline":null,"id_image":-1,"list":"'258'"},"94141":{"attributes_values":{"6":"US 10,5 EU 44,5"},"attributes":[259],"price":0,"specific_price":false,"ecotax":0,"weight":0,"quantity":1,"reference":"ASI006028T445","unit_impact":null,"minimal_quantity":"1","available_date":"","hideonline":null,"id_image":-1,"list":"'259'"},"94142":{"attributes_values":{"6":"US 11 EU 45"},"attributes":[260],"price":0,"specific_price":false,"ecotax":0,"weight":0,"quantity":1,"reference":"ASI006028T045","unit_impact":null,"minimal_quantity":"1","available_date":"","hideonline":null,"id_image":-1,"list":"'260'"},"94143":{"attributes_values":{"6":"US 11,5 EU 46"},"attributes":[261],"price":0,"specific_price":false,"ecotax":0,"weight":0,"quantity":1,"reference":"ASI006028T046","unit_impact":null,"minimal_quantity":"1","available_date":"","hideonline":null,"id_image":-1,"list":"'261'"},"94144":{"attributes_values":{"6":"US 12 EU 46,5"},"attributes":[262],"price":0,"specific_price":false,"ecotax":0,"weight":0,"quantity":0,"reference":"ASI006028T465","unit_impact":null,"minimal_quantity":"1","available_date":"","hideonline":null,"id_image":-1,"list":"'262'"},"94145":{"attributes_values":{"6":"US 12.5 EU 47"},"attributes":[263],"price":0,"specific_price":false,"ecotax":0,"weight":0,"quantity":0,"reference":"ASI006028T047","unit_impact":null,"minimal_quantity":"1","available_date":"","hideonline":null,"id_image":-1,"list":"'263'"},"94146":{"attributes_values":{"6":"US 13 EU 48"},"attributes":[789],"price":0,"specific_price":false,"ecotax":0,"weight":0,"quantity":0,"reference":"ASI006028T048","unit_impact":null,"minimal_quantity":"1","available_date":"","hideonline":null,"id_image":-1,"list":"'789'"}};
var combinationsFromController = {"94128":{"attributes_values":{"6":"US 4 EU 36"},"attribu
3
  • BeautifulSoup works with HTML/XML but you have text in JavaScript - it will not search in JavaScript. You get JavaScript as string so use string functions to get it - split() , slicing [start:end], regex, etc. Commented Feb 14, 2020 at 0:34
  • 1
    What is the issue, exactly? Stack Overflow is not a free code writing service. See: tour, How to Ask, help center, meta.stackoverflow.com/questions/261592/…. Commented Feb 14, 2020 at 1:45
  • What exactly is your desired output? Commented Feb 14, 2020 at 12:34

1 Answer 1

2

While generally not recommended to parse webpages (Using regular expressions to parse HTML: why not?), in your case you're interested in JavaScript code, not HTML. So it might be a quick and easy solution, to use re for this, for example:

>>> import re
>>> webpage = """some
... other
... javascript
... code
... var combinations = {"example": [1, 2, 3]};
... var combinationsFromController = {"example": [4, 5, 6]};
... some
... other
... javascript
... code"""
>>> re.findall(r"var combinations = .*", webpage)
['var combinations = {"example": [1, 2, 3]};']
>>> re.findall(r"var combinationsFromController = .*", webpage)
['var combinationsFromController = {"example": [4, 5, 6]};']
Sign up to request clarification or add additional context in comments.

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.