return report
report = "url": url, "final_url": resp.url, "status_code": resp.status_code, "title": title, "meta_description": meta_desc, "og_title": og_title, "og_description": og_desc, "keywords": keywords, "is_adult_content": adult_flag, "content_length_bytes": len(resp.content), xnexx hot
def fetch_url(url: str) -> requests.Response: """Perform a GET request with sane defaults.""" headers = "User-Agent": USER_AGENT resp = requests.get( url, headers=headers, timeout=TIMEOUT_SECONDS, allow_redirects=True, stream=False, ) resp.raise_for_status() # raise HTTPError for 4xx/5xx return resp return report report = "url": url, "final_url": resp
def extract_meta(soup: BeautifulSoup, name: str) -> str: """Return the content of a meta tag (name or property).""" tag = soup.find("meta", attrs="name": name) or soup.find( "meta", attrs="property": name ) return (tag["content"] if tag and tag.has_attr("content") else "") return report report = "url": url
def is_adult_content(text: str) -> bool: """Very naive adult‑content detection based on keyword presence.""" text_low = text.lower() return any(word in text_low for word in ADULT_KEYWORDS)
# ---------------------------------------------------------------------- # Configuration – tweak these if you want to broaden / narrow the checks. # ---------------------------------------------------------------------- ADULT_KEYWORDS = "porn", "xxx", "adult", "sex", "erotic", "nude", "nsfw", "explicit", "hardcore", "softcore", "camgirl", "camboy"