firecrawl/apps/playwright-service/main.py

42 lines
955 B
Python
Raw Normal View History

2024-04-25 17:31:28 +00:00
from fastapi import FastAPI
from playwright.async_api import async_playwright, Browser
2024-04-15 21:01:47 +00:00
from fastapi.responses import JSONResponse
from pydantic import BaseModel
2024-04-25 17:31:28 +00:00
2024-04-15 21:01:47 +00:00
app = FastAPI()
2024-04-15 21:01:47 +00:00
class UrlModel(BaseModel):
url: str
2024-05-10 01:00:58 +00:00
wait: int = None
2024-04-15 21:01:47 +00:00
2024-04-25 17:31:28 +00:00
browser: Browser = None
@app.on_event("startup")
async def startup_event():
global browser
playwright = await async_playwright().start()
browser = await playwright.chromium.launch()
2024-04-15 21:01:47 +00:00
2024-04-25 17:31:28 +00:00
@app.on_event("shutdown")
async def shutdown_event():
await browser.close()
2024-04-15 21:01:47 +00:00
2024-04-25 17:31:28 +00:00
@app.post("/html")
async def root(body: UrlModel):
context = await browser.new_context()
page = await context.new_page()
await page.goto(
body.url,
wait_until="load",
timeout=body.wait if body.wait else 15,
)
2024-04-25 17:31:28 +00:00
page_content = await page.content()
await context.close()
json_compatible_item_data = {"content": page_content}
return JSONResponse(content=json_compatible_item_data)