Bot and Spam Detection¶
Identify automated accounts, spam bots, and inauthentic behavior using AI-powered analysis of user profiles and activity patterns.
Overview¶
Bot detection analyzes user profiles, posting patterns, and content characteristics to identify automated or suspicious accounts. This helps maintain community quality, filter engagement data, and protect against spam attacks.
Use Cases¶
- Follower Quality Audit: Identify bot followers inflating counts
- Engagement Authenticity: Filter fake engagement from analytics
- Community Protection: Block spam accounts proactively
- Influencer Vetting: Verify influencer audience authenticity
- Research Data Quality: Clean datasets of bot-generated content
Basic Usage¶
import asyncio
from xeepy import Xeepy
from xeepy.ai import ContentGenerator
async def detect_bot():
async with Xeepy() as x:
ai = ContentGenerator(
provider="openai",
api_key="your-api-key",
model="gpt-4"
)
# Get user profile
profile = await x.scrape.profile("suspicious_account")
# Analyze for bot indicators
result = await ai.detect_bot(profile)
print(f"Bot probability: {result.bot_score:.1%}")
print(f"Classification: {result.classification}") # bot, human, suspicious
print(f"\nRisk factors:")
for factor in result.risk_factors:
print(f" - {factor}")
asyncio.run(detect_bot())
Detailed Bot Analysis¶
async def detailed_bot_analysis():
async with Xeepy() as x:
ai = ContentGenerator(provider="openai", api_key="...", model="gpt-4")
# Get comprehensive profile data
profile = await x.scrape.profile("account_to_check")
tweets = await x.scrape.tweets("account_to_check", limit=50)
# Full analysis with tweet patterns
result = await ai.detect_bot(
profile=profile,
tweets=tweets,
analyze_patterns=True # Check posting patterns
)
print(f"Account: @{profile.username}")
print(f"Bot Score: {result.bot_score:.1%}")
print(f"\nIndicators:")
print(f" Profile completeness: {result.indicators['profile_score']:.1%}")
print(f" Activity patterns: {result.indicators['activity_score']:.1%}")
print(f" Content originality: {result.indicators['content_score']:.1%}")
print(f" Network authenticity: {result.indicators['network_score']:.1%}")
asyncio.run(detailed_bot_analysis())
Batch Follower Audit¶
async def audit_followers():
async with Xeepy() as x:
ai = ContentGenerator(provider="openai", api_key="...", model="gpt-4")
# Get followers
followers = await x.scrape.followers("your_account", limit=500)
# Analyze each follower
bots = []
suspicious = []
humans = []
for follower in followers:
result = await ai.detect_bot(follower)
if result.bot_score > 0.8:
bots.append(follower)
elif result.bot_score > 0.5:
suspicious.append(follower)
else:
humans.append(follower)
total = len(followers)
print(f"Follower Audit Results:")
print(f" Likely bots: {len(bots)} ({len(bots)/total*100:.1f}%)")
print(f" Suspicious: {len(suspicious)} ({len(suspicious)/total*100:.1f}%)")
print(f" Likely humans: {len(humans)} ({len(humans)/total*100:.1f}%)")
# Export bot list
x.export.to_csv(bots, "potential_bots.csv")
asyncio.run(audit_followers())
Heuristic Detection (No AI)¶
async def heuristic_bot_detection():
async with Xeepy() as x:
profile = await x.scrape.profile("account_to_check")
# Rule-based bot indicators
score = 0
factors = []
# Check profile age vs tweet count
account_age_days = (datetime.now() - profile.created_at).days
tweets_per_day = profile.tweet_count / max(account_age_days, 1)
if tweets_per_day > 50:
score += 0.3
factors.append(f"High posting rate: {tweets_per_day:.1f} tweets/day")
# Check follower/following ratio
if profile.followers_count > 0:
ratio = profile.following_count / profile.followers_count
if ratio > 10:
score += 0.2
factors.append(f"Suspicious ratio: following {ratio:.1f}x more")
# Check default profile
if profile.default_profile_image:
score += 0.2
factors.append("Default profile image")
# Check bio length
if len(profile.bio or "") < 10:
score += 0.1
factors.append("Minimal or no bio")
# Check username pattern
import re
if re.search(r'\d{6,}$', profile.username):
score += 0.2
factors.append("Username ends with many numbers")
print(f"Heuristic bot score: {min(score, 1.0):.1%}")
print(f"Factors: {factors}")
asyncio.run(heuristic_bot_detection())
Configuration Options¶
| Parameter | Type | Default | Description |
|---|---|---|---|
profile | User | required | User profile object |
tweets | list | None | Recent tweets for pattern analysis |
analyze_patterns | bool | True | Check posting time patterns |
threshold | float | 0.7 | Bot classification threshold |
include_network | bool | False | Analyze follower network |
Combine Methods
For best results, combine AI analysis with heuristic rules. Use fast heuristics for initial filtering, then AI for borderline cases.
False Positives
Automated accounts aren't always malicious. Brand accounts, news bots, and utility accounts may trigger bot detection. Consider context.
Posting Pattern Analysis¶
async def analyze_posting_patterns():
async with Xeepy() as x:
ai = ContentGenerator(provider="openai", api_key="...", model="gpt-4")
tweets = await x.scrape.tweets("account_to_check", limit=100)
# Analyze timing patterns
from collections import Counter
hours = Counter(t.created_at.hour for t in tweets)
days = Counter(t.created_at.weekday() for t in tweets)
# Calculate variance (bots often have low variance)
hour_variance = max(hours.values()) / (sum(hours.values()) / 24) if hours else 0
print("Posting time distribution:")
print(f" Peak hour concentration: {hour_variance:.2f}x average")
# Check for precise intervals
if len(tweets) > 1:
intervals = []
for i in range(1, len(tweets)):
delta = (tweets[i-1].created_at - tweets[i].created_at).total_seconds()
intervals.append(delta)
avg_interval = sum(intervals) / len(intervals)
variance = sum((i - avg_interval)**2 for i in intervals) / len(intervals)
if variance < 100: # Very consistent timing
print(" ⚠️ Suspiciously consistent posting intervals")
asyncio.run(analyze_posting_patterns())
Content Similarity Detection¶
async def detect_content_spam():
async with Xeepy() as x:
ai = ContentGenerator(provider="openai", api_key="...", model="gpt-4")
tweets = await x.scrape.tweets("account_to_check", limit=50)
# Check for repeated content
texts = [t.text for t in tweets]
# Simple duplicate check
unique_ratio = len(set(texts)) / len(texts)
if unique_ratio < 0.5:
print(f"⚠️ Low content uniqueness: {unique_ratio:.1%}")
# AI-based similarity analysis
result = await ai.analyze_content_patterns(texts)
print(f"\nContent Analysis:")
print(f" Template usage detected: {result.template_score:.1%}")
print(f" Promotional content ratio: {result.promo_ratio:.1%}")
print(f" Repetitive phrases: {len(result.repeated_phrases)}")
asyncio.run(detect_content_spam())
Influencer Audience Audit¶
async def audit_influencer():
async with Xeepy() as x:
ai = ContentGenerator(provider="openai", api_key="...", model="gpt-4")
influencer = "influencer_username"
# Sample followers
followers = await x.scrape.followers(influencer, limit=200)
# Analyze sample
bot_count = 0
for follower in followers:
result = await ai.detect_bot(follower)
if result.bot_score > 0.7:
bot_count += 1
fake_percentage = bot_count / len(followers) * 100
print(f"Influencer Audit: @{influencer}")
print(f" Sampled followers: {len(followers)}")
print(f" Likely fake: {bot_count} ({fake_percentage:.1f}%)")
print(f" Estimated real followers: {int((100 - fake_percentage) / 100 * await x.scrape.profile(influencer).followers_count):,}")
asyncio.run(audit_influencer())
Export Detection Results¶
async def export_bot_report():
async with Xeepy() as x:
ai = ContentGenerator(provider="openai", api_key="...", model="gpt-4")
followers = await x.scrape.followers("your_account", limit=500)
report = []
for follower in followers:
result = await ai.detect_bot(follower)
report.append({
"username": follower.username,
"followers": follower.followers_count,
"bot_score": result.bot_score,
"classification": result.classification,
"risk_factors": ", ".join(result.risk_factors)
})
x.export.to_csv(report, "bot_detection_report.csv")
print(f"Exported analysis of {len(report)} accounts")
asyncio.run(export_bot_report())
Best Practices¶
- Sample Appropriately: For large follower bases, use representative samples
- Set Thresholds Carefully: Too strict catches false positives; too loose misses bots
- Consider Context: News aggregators and brand accounts may appear bot-like
- Use Multiple Signals: Combine profile, content, and behavior analysis
- Regular Audits: Bot tactics evolve; audit periodically
- Document Decisions: Keep records of why accounts were flagged