Hashtag Tweet Scraping¶
Collect and analyze tweets containing specific hashtags to track trends, measure campaign performance, and understand community conversations.
Overview¶
Hashtag scraping retrieves tweets using specific hashtags, providing insights into trending topics, campaign reach, and community engagement. This is essential for marketers, researchers, and community managers.
Use Cases¶
- Campaign Tracking: Measure hashtag campaign performance
- Trend Analysis: Monitor trending hashtags in your industry
- Community Research: Study conversations around specific topics
- Event Monitoring: Track event hashtags in real-time
- Content Discovery: Find shareable content in your niche
Basic Usage¶
import asyncio
from xeepy import Xeepy
async def scrape_hashtag():
async with Xeepy() as x:
# Scrape tweets with a hashtag
tweets = await x.scrape.hashtag("#Python", limit=100)
for tweet in tweets:
print(f"@{tweet.author.username}: {tweet.text[:60]}...")
print(f" Likes: {tweet.likes} | RTs: {tweet.retweets}\n")
# Export results
x.export.to_csv(tweets, "python_hashtag.csv")
asyncio.run(scrape_hashtag())
Advanced Hashtag Scraping¶
async def advanced_hashtag_scrape():
async with Xeepy() as x:
# Scrape with filtering options
tweets = await x.scrape.hashtag(
hashtag="#MachineLearning",
limit=500,
sort_by="Latest", # Latest, Top, People, Media
lang="en", # Language filter
since="2024-01-01", # Date range
until="2024-12-31",
min_likes=10, # Quality filter
include_retweets=False, # Original tweets only
verified_only=False # All users
)
print(f"Found {len(tweets)} tweets with #MachineLearning")
# Engagement metrics
total_likes = sum(t.likes for t in tweets)
total_rts = sum(t.retweets for t in tweets)
print(f"Total engagement: {total_likes} likes, {total_rts} RTs")
asyncio.run(advanced_hashtag_scrape())
Multiple Hashtag Analysis¶
async def multi_hashtag_analysis():
async with Xeepy() as x:
hashtags = ["#Python", "#JavaScript", "#Rust", "#Go"]
results = {}
for tag in hashtags:
tweets = await x.scrape.hashtag(tag, limit=200)
# Calculate metrics
results[tag] = {
"count": len(tweets),
"total_likes": sum(t.likes for t in tweets),
"total_retweets": sum(t.retweets for t in tweets),
"unique_authors": len(set(t.author.username for t in tweets)),
"avg_engagement": sum(t.likes + t.retweets for t in tweets) / len(tweets) if tweets else 0
}
# Compare hashtags
print("Hashtag Performance Comparison:")
print("-" * 60)
for tag, metrics in results.items():
print(f"{tag}:")
print(f" Tweets: {metrics['count']}")
print(f" Avg engagement: {metrics['avg_engagement']:.1f}")
print(f" Unique authors: {metrics['unique_authors']}\n")
asyncio.run(multi_hashtag_analysis())
Campaign Hashtag Tracking¶
async def track_campaign_hashtag():
async with Xeepy() as x:
campaign_tag = "#YourCampaign2024"
tweets = await x.scrape.hashtag(
campaign_tag,
limit=1000,
include_retweets=True
)
# Campaign metrics
original_tweets = [t for t in tweets if not t.is_retweet]
retweets = [t for t in tweets if t.is_retweet]
print(f"Campaign: {campaign_tag}")
print(f"Total mentions: {len(tweets)}")
print(f"Original tweets: {len(original_tweets)}")
print(f"Retweets: {len(retweets)}")
# Top contributors
from collections import Counter
contributors = Counter(t.author.username for t in original_tweets)
print("\nTop contributors:")
for user, count in contributors.most_common(10):
print(f" @{user}: {count} tweets")
# Reach estimation
total_followers = sum(t.author.followers_count for t in original_tweets)
print(f"\nEstimated reach: {total_followers:,} followers")
asyncio.run(track_campaign_hashtag())
Configuration Options¶
| Parameter | Type | Default | Description |
|---|---|---|---|
hashtag | str | required | Hashtag to search (with or without #) |
limit | int | 100 | Maximum tweets to retrieve |
sort_by | str | "Top" | Latest, Top, People, Media |
lang | str | None | Language code filter |
since | str | None | Start date (YYYY-MM-DD) |
until | str | None | End date (YYYY-MM-DD) |
min_likes | int | 0 | Minimum likes filter |
include_retweets | bool | True | Include retweets |
Hashtag Formatting
You can pass the hashtag with or without the # symbol. Both "#Python" and "Python" work correctly.
Popular Hashtags
Very popular hashtags may return thousands of results. Use limit and date filters to manage data volume.
Real-Time Hashtag Monitoring¶
async def monitor_hashtag():
async with Xeepy() as x:
hashtag = "#BreakingNews"
seen_ids = set()
print(f"Monitoring {hashtag}...")
while True:
tweets = await x.scrape.hashtag(
hashtag,
limit=50,
sort_by="Latest"
)
new_tweets = [t for t in tweets if t.id not in seen_ids]
for tweet in new_tweets:
seen_ids.add(tweet.id)
print(f"\n[NEW] @{tweet.author.username}")
print(f" {tweet.text[:100]}...")
print(f" Likes: {tweet.likes} | RTs: {tweet.retweets}")
await asyncio.sleep(30) # Check every 30 seconds
# asyncio.run(monitor_hashtag())
Hashtag Trend Analysis¶
async def analyze_hashtag_trends():
async with Xeepy() as x:
tweets = await x.scrape.hashtag("#AI", limit=500)
# Group by date
from collections import defaultdict
daily_counts = defaultdict(int)
daily_engagement = defaultdict(int)
for tweet in tweets:
date = tweet.created_at.strftime("%Y-%m-%d")
daily_counts[date] += 1
daily_engagement[date] += tweet.likes + tweet.retweets
print("Daily hashtag activity:")
for date in sorted(daily_counts.keys()):
print(f" {date}: {daily_counts[date]} tweets, {daily_engagement[date]} engagement")
asyncio.run(analyze_hashtag_trends())
Best Practices¶
- Start Recent: Use
sort_by="Latest"for real-time monitoring - Filter Quality: Set
min_likesto focus on engaging content - Track Variations: Monitor hashtag variations (e.g., #AI, #ArtificialIntelligence)
- Exclude Retweets: Use
include_retweets=Falsefor unique content analysis - Set Date Ranges: Bound searches for campaign-specific analysis
- Monitor Competitors: Track competitor campaign hashtags