selenium - Scrapy xpath returning empty list in python -


i don't know doing wrong. trying extract text , store in list. in firebug , firepath when enter path shows exact correct text. when apply returns empty list. trying scrape www.insider.in/mumbai. goes links , scrape event title,address , other information. here new edited code:

from scrapy.spider import basespider scrapy.selector import selector selenium import webdriver selenium.webdriver.common.keys import keys scrapy.selector import htmlxpathselector  import time import requests import csv   class insiderspider(basespider):     name = 'insider'     allowed_domains = ["insider.in"]     start_urls = ["http://www.insider.in/mumbai/"]      def parse(self,response):         driver = webdriver.firefox()         print response.url         driver.get(response.url)         s = selector(response)         #hxs = htmlxpathselector(response)         source_link = []         temp = []          title =""         price = ""         venue_name = ""         venue_address = ""         description = ""         event_details = []         alllinks = s.xpath('//div[@class="bottom-details-right"]//a/@href').extract()         print alllinks         length_of_alllinks = len(alllinks)         single_event in range(1,length_of_alllinks):             if "https://insider.in/event" in alllinks[single_event]:                 source_link.append(alllinks[single_event])                 driver.get(alllinks[single_event])                 s = selector(response)                 #hxs = htmlxpathselector(response)                 time.sleep(3)                 title = s.xpath('//div[@class = "cell-title in-headertitle"]/h1//text()').extract()                 print title                  temp = s.xpath('//div[@class = "cell-caption centered in-header"]//h3//text()').extract()                  print temp                 time.sleep(2)                 = len(s.xpath('//div[@class = "bold-caption price"]//text()').extract())                 if > 0:                     price = s.xpath('//div[@class = "bold-caption price"]//text()').extract()                      time.sleep(2)                 else:                     price = "rsvp"                     time.sleep(2)                 print price                 venue_name = s.xpath('//div[@class = "address"]//div[@class = "section-title"]//text()').extract()                  print venue_name                 venue_address = s.xpath('//div[@class ="address"]//div//text()[preceding-sibling::br]').extract()                  print venue_address                 description = s.xpath('//div[@class="cell-caption accordion-padding"]//text()').extract()                  print description                 time.sleep(5)                 event_details.append([title,temp,price,venue_name,venue_address,description])             else:                 print "other part" 

edited output:

[u'https://insider.in/weekender-music-festival-2015', u'https://insider.in/event/east-india-comedy-presents-back-benchers#', u'https://insider.in/event/art-of-story-telling', u'https://insider.in/feelings-in-india-with-kanan-gill', u'https://insider.in/event/the-tall-tales-workshop-capture-your-story', u'https://insider.in/halloween-by-the-pier-2015', u'https://insider.in/event/whats-your-story', u'https://insider.in/event/beyond-contemporary-art'] 2015-08-03 12:53:29 [selenium.webdriver.remote.remote_connection] debug: post http://127.0.0.1:60924/hub/session/f675b909-5515-41d4-a89e-d197c296023d/url {"url": "https://insider.in/event/east-india-comedy-presents-back-benchers#", "sessionid": "f675b909-5515-41d4-a89e-d197c296023d"} 2015-08-03 12:53:29 [selenium.webdriver.remote.remote_connection] debug: finished request  []  []  rsvp  []  []  [] [[[], [], 'rsvp', [], [], []]] 

even if condition fails , prints rsvp. don't seem understand doing wrong. stuck in part since 3 days. please help.

i removed things webdriver , got basic code works

import scrapy import logging scrapy.http import request scrapy.selector import selector  class insiderspider(scrapy.spider):     name = 'insider'     allowed_domains = ["insider.in"]     start_urls = ["http://www.insider.in/mumbai/"]     event_details = list() # changed. event_detail menber data of class      def parse(self, response):         source_link = []         temp = []         title =""         price = ""         venue_name = ""         venue_address = ""         description = ""         alllinks = response.xpath('//div[@class="bottom-details-right"]//a/@href').extract()         print alllinks         single_event in alllinks:             if "https://insider.in/event" in single_event:                 yield request(url = single_event, callback = self.parse_event)             else:                 print 'other part'      def parse_event(self, response):         title = response.xpath('//div[@class = "cell-title in-headertitle"]/h1//text()').extract()         print title         temp = response.xpath('//div[@class = "cell-caption centered in-header"]//h3//text()').extract()         print temp         = len(response.xpath('//div[@class = "bold-caption price"]//text()').extract())         if > 0:             price = response.xpath('//div[@class = "bold-caption price"]//text()').extract()         else:             price = "rsvp"         print price         venue_name = response.xpath('normalize-space(//div[@class = "address"]//div[@class = "section-title"]//text())').extract()          print venue_name         venue_address = response.xpath('normalize-space(//div[@class ="address"]//div//text()[preceding-sibling::br])').extract()          print venue_address         description = response.xpath('normalize-space(//div[@class="cell-caption accordion-padding"]//text())').extract()          print description         self.event_details.append([title,temp,price,venue_name,venue_address,description]) # notice event_details used self.event_details ie, using member data         print self.event_details # here self.event_details 

Comments

Popular posts from this blog

python - Healpy: From Data to Healpix map -

c - Bitwise operation with (signed) enum value -

xslt - Unnest parent nodes by child node -