-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathFetchWeb.py
More file actions
15 lines (14 loc) · 786 Bytes
/
FetchWeb.py
File metadata and controls
15 lines (14 loc) · 786 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
from bs4 import BeautifulSoup
import requests
def fetch_website_content(url):
"""Fetch and clean text content from a website to replicate Ctrl+A and copy behavior."""
response = requests.get(url)
if response.status_code != 200:
raise Exception(f"Failed to fetch the website. Status code: {response.status_code}")
soup = BeautifulSoup(response.text, "html.parser")
# Extract all visible text as if copying directly from the browser
for script in soup(["script", "style", "noscript"]):
script.extract() # Remove script, style, and noscript tags
text = soup.get_text(separator="\n") # Get all text with line breaks
text = "\n".join([line.strip() for line in text.splitlines() if line.strip()]) # Remove excess blank lines
return text