[FIXED] Daten werden in DataFrame überschrieben

Ausgabe

Versuchen Sie, die Daten zu kratzen, aber die Daten werden überschrieben und sie geben die Daten von nur 2 Seiten in der CSV-Datei an. Empfehlen Sie freundlicherweise eine Lösung dafür. Ich warte auf Ihre Antwort. Wie kann ich das beheben? Gibt es eine Möglichkeit, dann schlagen Sie mir vor, ich denke, aufgrund der For-Schleife überschreiben sie Daten. Danke. Dies ist der Seitenlink https://www.askgamblers.com/online-casinos/countries/uk/

from selenium import webdriver
import time
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import pandas as pd
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from csv import writer


options = webdriver.ChromeOptions()
options.add_argument("--no-sandbox")
options.add_argument("--disable-gpu")
options.add_argument("--window-size=1920x1080")
options.add_argument("--disable-extensions")
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
wait = WebDriverWait(driver, 20) 


for page in range(1,3):             
    URL = 'https://www.askgamblers.com/online-casinos/countries/uk/{page}'.format(page=page)
    driver.get(URL)
    time.sleep(2)

    urls= []
    data = []


    page_links =driver.find_elements(By.XPATH, "//div[@class='card__desc']//a[starts-with(@href, '/online')]")
    for link in page_links:
        href=link.get_attribute("href")
        urls.append(href)
        
    product=[]
  
     
    for url in urls:
        wev={}
        driver.get(url)
        time.sleep(1)


        try:
            title=driver.find_element(By.CSS_SELECTOR,"h1.review-intro__title").text   
        except:
            pass
        
        wev['Title']=title

        soup = BeautifulSoup(driver.page_source,"lxml")

        pays=soup.select("div#tabPayments")

        for pay in pays:
            
            try:
                t1=pay.select_one(".review-details-wrapper:nth-child(1) .review-details__item:nth-child(1) .review-details__text").get_text(' ',strip=True)
            except:
                pass
            
            wev['deposit_method']=t1
            
            
            try:
                t2=pay.select_one(".review-details-wrapper:nth-child(1) .review-details__item+ .review-details__item .review-details__text").get_text(' ',strip=True)
                
            except:
                pass
            
            wev['curriences']=t2
            
            try:
                t3=pay.select_one(" .review-details-wrapper+ .review-details-wrapper .review-details__item:nth-child(1) .review-details__text").get_text(' ',strip=True)
                
            except:
                pass
            
            wev['with_drawl method']=t3
            
            try:
                t4 = pay.select_one(" .review-details-wrapper+ .review-details-wrapper .review-details__item:nth-child(2) .review-details__text")
                t4 = [i.replace("\n", "") for i in t4 if i.text]
                
            except:
                pass
            
            wev['with_drawl_time']=t4
            
            product.append(wev)
            
    df=pd.DataFrame(product)
    df.to_csv('casino.csv') 

Lösung

Alle Ergebnisse in 1 Datei:

from selenium import webdriver
import time
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import pandas as pd
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from csv import writer


options = webdriver.ChromeOptions()
options.add_argument("--no-sandbox")
options.add_argument("--disable-gpu")
options.add_argument("--window-size=1920x1080")
options.add_argument("--disable-extensions")
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
wait = WebDriverWait(driver, 20) 

product=[]
for page in range(1,4):             
    URL = 'https://www.askgamblers.com/online-casinos/countries/uk/{page}'.format(page=page)
    driver.get(URL)
    time.sleep(2)

    urls= []
    data = []


    page_links =driver.find_elements(By.XPATH, "//div[@class='card__desc']//a[starts-with(@href, '/online')]")
    for link in page_links:
        href=link.get_attribute("href")
        urls.append(href)
        
    
  
     
    for url in urls:
        wev={}
        driver.get(url)
        time.sleep(1)


        try:
            title=driver.find_element(By.CSS_SELECTOR,"h1.review-intro__title").text   
        except:
            pass
        
        wev['Title']=title

        soup = BeautifulSoup(driver.page_source,"lxml")

        pays=soup.select("div#tabPayments")

        for pay in pays:
            
            try:
                t1=pay.select_one(".review-details-wrapper:nth-child(1) .review-details__item:nth-child(1) .review-details__text").get_text(' ',strip=True)
            except:
                pass
            
            wev['deposit_method']=t1
            
            
            try:
                t2=pay.select_one(".review-details-wrapper:nth-child(1) .review-details__item+ .review-details__item .review-details__text").get_text(' ',strip=True)
                
            except:
                pass
            
            wev['curriences']=t2
            
            try:
                t3=pay.select_one(" .review-details-wrapper+ .review-details-wrapper .review-details__item:nth-child(1) .review-details__text").get_text(' ',strip=True)
                
            except:
                pass
            
            wev['with_drawl method']=t3
            
            try:
                t4 = pay.select_one(" .review-details-wrapper+ .review-details-wrapper .review-details__item:nth-child(2) .review-details__text")
                t4 = [i.replace("\n", "") for i in t4 if i.text]
                
            except:
                pass
            
            wev['with_drawl_time']=t4
            
            product.append(wev)
            
df=pd.DataFrame(product)
df.to_csv('casino.csv')


Beantwortet von –
krisskad


Antwort geprüft von –
Willingham (FixError Volunteer)

0 Shares:
Leave a Reply

Your email address will not be published. Required fields are marked *

You May Also Like