The LangChain integration enables your agents to extract structured data from websites using natural language. This powerful combination allows you to build sophisticated AI agents that can understand and process web content intelligently.
Extract structured data from any webpage using natural language prompts:
from langchain_scrapegraph.tools import SmartScraperTool# Initialize the tool (uses SGAI_API_KEY from environment)tool = SmartscraperTool()# Extract information using natural languageresult = tool.invoke({ "website_url": "https://www.example.com", "user_prompt": "Extract the main heading and first paragraph"})
Define the structure of the output using Pydantic models:
from typing import Listfrom pydantic import BaseModel, Fieldfrom langchain_scrapegraph.tools import SmartScraperToolclass WebsiteInfo(BaseModel): title: str = Field(description="The main title of the webpage") description: str = Field(description="The main description or first paragraph") urls: List[str] = Field(description="The URLs inside the webpage")# Initialize with schematool = SmartScraperTool(llm_output_schema=WebsiteInfo)result = tool.invoke({ "website_url": "https://www.example.com", "user_prompt": "Extract the website information"})
from langchain_scrapegraph.tools import SearchScraperTooltool = SearchScraperTool()result = tool.invoke({ "user_prompt": "Find the best restaurants in San Francisco",})
from typing import Optionalfrom pydantic import BaseModel, Fieldfrom langchain_scrapegraph.tools import SearchScraperToolclass RestaurantInfo(BaseModel): name: str = Field(description="The restaurant name") address: str = Field(description="The restaurant address") rating: float = Field(description="The restaurant rating")tool = SearchScraperTool(llm_output_schema=RestaurantInfo)result = tool.invoke({ "user_prompt": "Find the best restaurants in San Francisco"})
Create a research agent that can gather and analyze web data:
from langchain.agents import initialize_agent, AgentTypefrom langchain_scrapegraph.tools import SmartScraperToolfrom langchain_openai import ChatOpenAI# Initialize toolstools = [ SmartScraperTool(),]# Create an agentagent = initialize_agent( tools=tools, llm=ChatOpenAI(temperature=0), agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True)# Use the agentresponse = agent.run(""" Visit example.com, make a summary of the content and extract the main heading and first paragraph""")