See social network users cryptocurrency addresses and balances. Powered by transparent blockchains.
https://iseeyour.cash/
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
213 lines
6.3 KiB
213 lines
6.3 KiB
from typing import Dict, Optional |
|
|
|
from requests_html import HTML, HTMLSession |
|
|
|
from nitter_scraper.schema import Profile # noqa: I100, I202 |
|
|
|
|
|
def username_cleaner(username: str) -> str: |
|
"""Strips @ symbol from a username. |
|
|
|
Example: |
|
@dgnsrekt -> dgnsrekt |
|
|
|
Args: |
|
username: username with @ symbol to remove. |
|
|
|
Returns: |
|
Username with @ symbol stripped. |
|
""" |
|
return username.replace("@", "") |
|
|
|
|
|
def link_parser(element: HTML) -> str: |
|
"""Gets the first link from an html element |
|
|
|
Used for the profiles website, photo and banner links. |
|
|
|
Args: |
|
element: HTML element with a link to parse. |
|
|
|
Returns: |
|
First link from a collection of links. |
|
""" |
|
return list(element.links)[0] |
|
|
|
|
|
def parse_user_id_from_banner(banner_url: str) -> str: |
|
"""Parses the users id from the users banner photo url. |
|
|
|
The user id can only be parsed from the banner photos url. |
|
|
|
Example: |
|
``` |
|
/pic/profile_banners%2F2474416796%2F1600567028%2F1500x500 -> 2474416796 |
|
^ ^ |
|
| | |
|
---------- |
|
user id section in banner link |
|
``` |
|
|
|
Args: |
|
banner_url: URL of the profiles banner photo. |
|
|
|
Returns: |
|
The target profiles user id. |
|
|
|
""" |
|
return banner_url.split("%2F")[1] |
|
|
|
|
|
def stat_cleaner(stat: str) -> int: |
|
"""Cleans and converts single stat. |
|
|
|
Used for the tweets, followers, following, and likes count sections. |
|
|
|
Args: |
|
stat: Stat to be cleaned. |
|
|
|
Returns: |
|
A stat with commas removed and converted to int. |
|
|
|
""" |
|
return int(stat.replace(",", "")) |
|
|
|
|
|
def profile_parser(elements: Dict) -> Dict: |
|
"""Converts parsed sections to text. |
|
|
|
Cleans and processes a dictionary of gathered html elements. |
|
|
|
Args: |
|
elements: Elements prepared to clean and convert. |
|
|
|
Returns: |
|
A dictionary of element sections cleaned and converted to their finalized types. |
|
|
|
""" |
|
elements["username"] = username_cleaner(elements["username"].text) |
|
|
|
elements["name"] = elements["name"].text |
|
|
|
if elements.get("location"): |
|
elements["location"] = elements["location"].text |
|
|
|
elements["is_verified"] = True if elements.get("is_verified") else False |
|
|
|
elements["is_private"] = True if elements.get("is_private") else False |
|
|
|
if elements.get("biography"): |
|
elements["biography"] = elements["biography"].text |
|
|
|
if elements.get("website"): |
|
elements["website"] = link_parser(elements["website"]) |
|
|
|
if elements.get("profile_photo"): |
|
elements["profile_photo"] = link_parser(elements["profile_photo"]) |
|
|
|
if elements.get("banner_photo"): |
|
elements["banner_photo"] = link_parser(elements["banner_photo"]) |
|
elements["user_id"] = parse_user_id_from_banner(elements["banner_photo"]) |
|
|
|
if elements.get("tweets_count"): |
|
elements["tweets_count"] = stat_cleaner(elements["tweets_count"].text) |
|
|
|
if elements.get("following_count"): |
|
elements["following_count"] = stat_cleaner(elements["following_count"].text) |
|
|
|
if elements.get("followers_count"): |
|
elements["followers_count"] = stat_cleaner(elements["followers_count"].text) |
|
|
|
if elements.get("likes_count"): |
|
elements["likes_count"] = stat_cleaner(elements["likes_count"].text) |
|
|
|
return elements |
|
|
|
|
|
def html_parser(html: HTML) -> Dict: |
|
"""Parses HTML element into individual sections |
|
|
|
Given an html element the html_parser will search for each profile section using |
|
CSS selectors. All parsed html elements are gathered into a dictionary and returned. |
|
|
|
Args: |
|
html: HTML element from a successful nitter profile scraped response. |
|
|
|
Returns: |
|
A dictionary of found elements from the parsed sections. |
|
|
|
""" |
|
elements = {} |
|
|
|
elements["username"] = html.find(".profile-card-username", first=True) |
|
|
|
elements["name"] = html.find(".profile-card-fullname", first=True) |
|
|
|
elements["biography"] = html.find(".profile-bio", first=True) |
|
|
|
elements["location"] = html.find(".profile-location", first=True) |
|
|
|
elements["is_verified"] = html.find( |
|
".profile-card-fullname .icon-container .verified-icon", first=True |
|
) |
|
|
|
elements["is_private"] = html.find( |
|
".profile-card-fullname .icon-container .icon-lock", first=True |
|
) |
|
|
|
elements["profile_photo"] = html.find(".profile-card-avatar", first=True) |
|
|
|
elements["banner_photo"] = html.find(".profile-banner a", first=True) |
|
|
|
elements["website"] = html.find(".profile-website", first=True) |
|
|
|
profile_statlist = html.find(".profile-statlist", first=True) |
|
|
|
elements["tweets_count"] = profile_statlist.find(".posts .profile-stat-num", first=True) |
|
|
|
elements["following_count"] = profile_statlist.find(".following .profile-stat-num", first=True) |
|
|
|
elements["followers_count"] = profile_statlist.find(".followers .profile-stat-num", first=True) |
|
|
|
elements["likes_count"] = profile_statlist.find(".likes .profile-stat-num", first=True) |
|
|
|
elements = {k: v for k, v in elements.items() if v is not None} |
|
|
|
return elements |
|
|
|
|
|
def get_profile( |
|
username: str, not_found_ok: bool = False, address: str = "https://nitter.net" |
|
) -> Optional[Profile]: |
|
"""Scrapes nitter for the target users profile information. |
|
|
|
Args: |
|
username: The target profiles username. |
|
not_found_ok: If not_found_ok is false (the default), a ValueError is raised if the target |
|
profile doesn't exist. If not_found_ok is true, None will be returned instead. |
|
address: The address to scrape profile data from. The default scrape location is |
|
'https://nitter.net' which should be used as a backup. This value will normally be |
|
replaced by the address of a local docker container instance of nitter. |
|
|
|
Returns: |
|
Profile object if successfully scraped, otherwise None. |
|
|
|
Raises: |
|
ValueError: If the target profile does not exist and the not_found_ok argument is false. |
|
|
|
|
|
""" |
|
url = f"{address}/{username}" |
|
session = HTMLSession() |
|
response = session.get(url) |
|
|
|
if response.status_code == 200: # user exists |
|
elements = html_parser(response.html) |
|
parsed_elements = profile_parser(elements) |
|
return Profile.from_dict(parsed_elements) |
|
|
|
if not_found_ok: |
|
return None |
|
|
|
else: |
|
raise ValueError(f'Oops! Either "{username}" does not exist or is private.')
|
|
|