Ausgabe
Versuchen Sie, die Daten zu kratzen, aber die Daten werden überschrieben und sie geben die Daten von nur 2 Seiten in der CSV-Datei an. Empfehlen Sie freundlicherweise eine Lösung dafür. Ich warte auf Ihre Antwort. Wie kann ich das beheben? Gibt es eine Möglichkeit, dann schlagen Sie mir vor, ich denke, aufgrund der For-Schleife überschreiben sie Daten. Danke. Dies ist der Seitenlink https://www.askgamblers.com/online-casinos/countries/uk/
from selenium import webdriver
import time
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import pandas as pd
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from csv import writer
options = webdriver.ChromeOptions()
options.add_argument("--no-sandbox")
options.add_argument("--disable-gpu")
options.add_argument("--window-size=1920x1080")
options.add_argument("--disable-extensions")
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
wait = WebDriverWait(driver, 20)
for page in range(1,3):
URL = 'https://www.askgamblers.com/online-casinos/countries/uk/{page}'.format(page=page)
driver.get(URL)
time.sleep(2)
urls= []
data = []
page_links =driver.find_elements(By.XPATH, "//div[@class='card__desc']//a[starts-with(@href, '/online')]")
for link in page_links:
href=link.get_attribute("href")
urls.append(href)
product=[]
for url in urls:
wev={}
driver.get(url)
time.sleep(1)
try:
title=driver.find_element(By.CSS_SELECTOR,"h1.review-intro__title").text
except:
pass
wev['Title']=title
soup = BeautifulSoup(driver.page_source,"lxml")
pays=soup.select("div#tabPayments")
for pay in pays:
try:
t1=pay.select_one(".review-details-wrapper:nth-child(1) .review-details__item:nth-child(1) .review-details__text").get_text(' ',strip=True)
except:
pass
wev['deposit_method']=t1
try:
t2=pay.select_one(".review-details-wrapper:nth-child(1) .review-details__item+ .review-details__item .review-details__text").get_text(' ',strip=True)
except:
pass
wev['curriences']=t2
try:
t3=pay.select_one(" .review-details-wrapper+ .review-details-wrapper .review-details__item:nth-child(1) .review-details__text").get_text(' ',strip=True)
except:
pass
wev['with_drawl method']=t3
try:
t4 = pay.select_one(" .review-details-wrapper+ .review-details-wrapper .review-details__item:nth-child(2) .review-details__text")
t4 = [i.replace("\n", "") for i in t4 if i.text]
except:
pass
wev['with_drawl_time']=t4
product.append(wev)
df=pd.DataFrame(product)
df.to_csv('casino.csv')
Lösung
Alle Ergebnisse in 1 Datei:
from selenium import webdriver
import time
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import pandas as pd
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from csv import writer
options = webdriver.ChromeOptions()
options.add_argument("--no-sandbox")
options.add_argument("--disable-gpu")
options.add_argument("--window-size=1920x1080")
options.add_argument("--disable-extensions")
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
wait = WebDriverWait(driver, 20)
product=[]
for page in range(1,4):
URL = 'https://www.askgamblers.com/online-casinos/countries/uk/{page}'.format(page=page)
driver.get(URL)
time.sleep(2)
urls= []
data = []
page_links =driver.find_elements(By.XPATH, "//div[@class='card__desc']//a[starts-with(@href, '/online')]")
for link in page_links:
href=link.get_attribute("href")
urls.append(href)
for url in urls:
wev={}
driver.get(url)
time.sleep(1)
try:
title=driver.find_element(By.CSS_SELECTOR,"h1.review-intro__title").text
except:
pass
wev['Title']=title
soup = BeautifulSoup(driver.page_source,"lxml")
pays=soup.select("div#tabPayments")
for pay in pays:
try:
t1=pay.select_one(".review-details-wrapper:nth-child(1) .review-details__item:nth-child(1) .review-details__text").get_text(' ',strip=True)
except:
pass
wev['deposit_method']=t1
try:
t2=pay.select_one(".review-details-wrapper:nth-child(1) .review-details__item+ .review-details__item .review-details__text").get_text(' ',strip=True)
except:
pass
wev['curriences']=t2
try:
t3=pay.select_one(" .review-details-wrapper+ .review-details-wrapper .review-details__item:nth-child(1) .review-details__text").get_text(' ',strip=True)
except:
pass
wev['with_drawl method']=t3
try:
t4 = pay.select_one(" .review-details-wrapper+ .review-details-wrapper .review-details__item:nth-child(2) .review-details__text")
t4 = [i.replace("\n", "") for i in t4 if i.text]
except:
pass
wev['with_drawl_time']=t4
product.append(wev)
df=pd.DataFrame(product)
df.to_csv('casino.csv')
Beantwortet von – krisskad
Antwort geprüft von – Willingham (FixError Volunteer)