ThreadScraper¶

Scrapes and unrolls Twitter/X threads into a complete conversation.

Import¶

from xeepy.scrapers.thread import ThreadScraper

Class Signature¶

class ThreadScraper:
    def __init__(
        self,
        browser_manager: BrowserManager,
        rate_limiter: Optional[RateLimiter] = None
    )

Parameters¶

Parameter	Type	Default	Description
`browser_manager`	`BrowserManager`	Required	Browser manager instance
`rate_limiter`	`Optional[RateLimiter]`	`None`	Rate limiter instance

Methods¶

Method	Returns	Description
`scrape(tweet_url)`	`Thread`	Unroll a thread
`scrape_from_tweet(tweet_id)`	`Thread`	Unroll from tweet ID
`is_thread(tweet_url)`	`bool`	Check if tweet is part of thread
`get_thread_length(tweet_url)`	`int`	Count tweets in thread

`scrape`¶

async def scrape(
    self,
    tweet_url: str,
    include_media: bool = True
) -> Thread

Unroll a complete thread from any tweet in the thread.

Parameters: - tweet_url: URL of any tweet in the thread - include_media: Include media attachments

Returns: Thread object containing all tweets in order

`scrape_from_tweet`¶

async def scrape_from_tweet(
    self,
    tweet_id: str,
    include_media: bool = True
) -> Thread

Unroll thread using tweet ID.

`is_thread`¶

async def is_thread(self, tweet_url: str) -> bool

Check if a tweet is part of a thread.

`get_thread_length`¶

async def get_thread_length(self, tweet_url: str) -> int

Get the number of tweets in a thread without full scrape.

Thread Object¶

@dataclass
class Thread:
    author: User                     # Thread author
    tweets: List[Tweet]              # Ordered list of tweets
    total_tweets: int                # Number of tweets
    created_at: datetime             # First tweet timestamp
    total_likes: int                 # Sum of all likes
    total_retweets: int              # Sum of all retweets
    total_replies: int               # Sum of all replies

    def to_text(self) -> str:
        """Convert thread to readable text."""
        pass

    def to_markdown(self) -> str:
        """Convert thread to markdown format."""
        pass

Usage Examples¶

Basic Thread Unrolling¶

from xeepy import Xeepy

async def main():
    async with Xeepy() as x:
        thread = await x.scrape.thread(
            "https://x.com/user/status/123456789"
        )

        print(f"Thread by @{thread.author.username}")
        print(f"Total tweets: {thread.total_tweets}")
        print("=" * 50)

        for i, tweet in enumerate(thread.tweets, 1):
            print(f"\n[{i}/{thread.total_tweets}]")
            print(tweet.text)
            print(f"❤️ {tweet.like_count}")

asyncio.run(main())

Export Thread as Text¶

from xeepy import Xeepy

async def main():
    async with Xeepy() as x:
        thread = await x.scrape.thread(
            "https://x.com/user/status/123456789"
        )

        # Export as plain text
        text = thread.to_text()
        with open("thread.txt", "w") as f:
            f.write(text)

        # Export as markdown
        markdown = thread.to_markdown()
        with open("thread.md", "w") as f:
            f.write(markdown)

asyncio.run(main())

Check Thread Before Scraping¶

from xeepy import Xeepy

async def main():
    async with Xeepy() as x:
        url = "https://x.com/user/status/123"

        if await x.scrape.is_thread(url):
            length = await x.scrape.thread_length(url)
            print(f"This is a thread with {length} tweets")

            thread = await x.scrape.thread(url)
            # Process thread...
        else:
            print("This is a single tweet")

asyncio.run(main())

Thread Analytics¶

from xeepy import Xeepy

async def analyze_thread(url: str):
    async with Xeepy() as x:
        thread = await x.scrape.thread(url)

        print(f"Thread Analytics for @{thread.author.username}")
        print("=" * 50)
        print(f"Total tweets: {thread.total_tweets}")
        print(f"Total likes: {thread.total_likes:,}")
        print(f"Total retweets: {thread.total_retweets:,}")
        print(f"Total replies: {thread.total_replies:,}")

        avg_likes = thread.total_likes / thread.total_tweets
        print(f"Avg likes per tweet: {avg_likes:.1f}")

        # Find best performing tweet in thread
        best = max(thread.tweets, key=lambda t: t.like_count)
        print(f"\nBest performing tweet ({best.like_count} likes):")
        print(f"  {best.text[:100]}...")

asyncio.run(analyze_thread("https://x.com/user/status/123"))

Batch Thread Export¶

from xeepy import Xeepy
import json

async def export_threads(urls: list):
    async with Xeepy() as x:
        threads_data = []

        for url in urls:
            try:
                thread = await x.scrape.thread(url)
                threads_data.append({
                    "author": thread.author.username,
                    "tweets": [t.text for t in thread.tweets],
                    "total_likes": thread.total_likes,
                    "url": url
                })
            except Exception as e:
                print(f"Failed to scrape {url}: {e}")

        with open("threads.json", "w") as f:
            json.dump(threads_data, f, indent=2)

urls = [
    "https://x.com/user1/status/123",
    "https://x.com/user2/status/456"
]
asyncio.run(export_threads(urls))