from pydantic import BaseModel, Field
from typing import List, Optional, Dict
from datetime import datetime
from scrapegraph_py import Client
# Schema for company profile data
class CompanyProfile(BaseModel):
name: str = Field(description="Company name")
market_share: Optional[float] = Field(description="Market share percentage")
revenue: Optional[str] = Field(description="Annual revenue")
employees: Optional[str] = Field(description="Number of employees")
headquarters: Optional[str] = Field(description="Company headquarters")
key_products: Optional[List[str]] = Field(description="Key products or services")
# Schema for market metrics
class MarketMetrics(BaseModel):
size: str = Field(description="Total market size")
growth_rate: float = Field(description="Annual growth rate percentage")
cagr: Optional[float] = Field(description="Compound Annual Growth Rate")
forecast_period: str = Field(description="Market forecast period")
segments: Dict[str, float] = Field(description="Market segments and their shares")
# Schema for industry analysis
class IndustryAnalysis(BaseModel):
sector: str = Field(description="Industry sector name")
subsector: Optional[str] = Field(description="Industry subsector")
market_metrics: MarketMetrics = Field(description="Market size and growth metrics")
trends: List[str] = Field(description="Key industry trends")
key_players: List[CompanyProfile] = Field(description="Major companies in the sector")
challenges: Optional[List[str]] = Field(description="Industry challenges")
opportunities: Optional[List[str]] = Field(description="Growth opportunities")
technologies: Optional[List[str]] = Field(description="Emerging technologies")
regulations: Optional[List[str]] = Field(description="Key regulations and policies")
client = Client()
# Collect industry analysis data
response = client.smartscraper(
website_url="https://industry-research-site.com/sector-analysis",
user_prompt="Extract comprehensive industry analysis including detailed market metrics, company profiles, trends, and regulatory factors. Focus on quantitative data where available.",
output_schema=IndustryAnalysis
)
# Generate insights report
print(f"Industry Analysis: {response.sector}")
if response.subsector:
print(f"Subsector: {response.subsector}")
print("\nMarket Overview:")
print(f"Size: {response.market_metrics.size}")
print(f"Growth Rate: {response.market_metrics.growth_rate}%")
if response.market_metrics.cagr:
print(f"CAGR: {response.market_metrics.cagr}%")
print(f"Forecast Period: {response.market_metrics.forecast_period}")
print("\nMarket Segments:")
for segment, share in response.market_metrics.segments.items():
print(f"- {segment}: {share}%")
print("\nKey Players:")
for player in response.key_players:
print(f"\nCompany: {player.name}")
if player.market_share:
print(f"Market Share: {player.market_share}%")
if player.revenue:
print(f"Revenue: {player.revenue}")
if player.key_products:
print(f"Key Products: {', '.join(player.key_products)}")
print("\nIndustry Trends:")
for trend in response.trends:
print(f"- {trend}")
if response.technologies:
print("\nEmerging Technologies:")
for tech in response.technologies:
print(f"- {tech}")
if response.regulations:
print("\nKey Regulations:")
for reg in response.regulations:
print(f"- {reg}")