For websites with traditional page-based navigation:
# Traditional pagination (page=1, page=2, etc.)response = client.smartscraper( website_url="https://example.com/products?page=1", user_prompt="Extract products from this page", total_pages=5)
For websites with infinite scroll or βLoad Moreβ buttons:
# Infinite scroll paginationresponse = client.smartscraper( website_url="https://example.com/feed", user_prompt="Extract all posts from the feed", total_pages=1, # Single page number_of_scrolls=10 # Multiple scrolls to load more content)
# Good: Specific about pagination contextuser_prompt = """Extract all product information from this page and any subsequent pages.Include: name, price, rating, availability, and image URL.Ensure you capture all products across multiple pages."""# Better: Include pagination instructionsuser_prompt = """Extract product information from this e-commerce page.For each product, get: name, price, rating, availability, image URL.This is page 1 of a multi-page product listing.Look for pagination controls and extract data from all visible pages."""
import time# Implement delays between requestsfor page in range(1, 6): response = client.smartscraper( website_url=f"https://example.com/products?page={page}", user_prompt="Extract products", total_pages=1, # One page at a time wait_for=3 # Wait 3 seconds ) # Additional delay between requests time.sleep(2)
# Amazon product scrapingresponse = client.smartscraper( website_url="https://amazon.com/s?k=smartphones", user_prompt=""" Extract all smartphone products from this search results page. For each product include: name, price, rating, reviews count, availability, and prime eligibility. """, output_schema=ProductCatalog, total_pages=5, number_of_scrolls=3)
# Twitter/X feed scrapingresponse = client.smartscraper( website_url="https://twitter.com/search?q=AI", user_prompt=""" Extract all tweets from this search results page. For each tweet include: author, content, timestamp, likes, retweets, and replies count. """, total_pages=1, number_of_scrolls=15 # More scrolls for social media)
# News website scrapingresponse = client.smartscraper( website_url="https://reuters.com/technology", user_prompt=""" Extract all technology news articles from this page. For each article include: headline, summary, author, publication date, and category. """, output_schema=NewsFeed, total_pages=4)