Files
ProfileMatching/find_twitter_candidates.py
Andrew Jiang 5319d4d868 Initial commit: Twitter-Telegram Profile Matching System
This module provides comprehensive Twitter-to-Telegram profile matching
and verification using 10 different matching methods and LLM verification.

Features:
- 10 matching methods (phash, usernames, bio handles, URL resolution, fuzzy names)
- URL resolution integration for t.co → t.me links
- Async LLM verification with GPT-5-mini
- Interactive menu system with real-time stats
- Threaded candidate finding (~1.5 contacts/sec)
- Comprehensive documentation and guides

Key Components:
- find_twitter_candidates.py: Core matching logic (10 methods)
- find_twitter_candidates_threaded.py: Threaded implementation
- verify_twitter_matches_v2.py: LLM verification (V5 prompt)
- review_match_quality.py: Analysis and quality review
- main.py: Interactive menu system
- Complete documentation (README, CHANGELOG, QUICKSTART)

Performance:
- Candidate finding: ~16-18 hours for 43K contacts
- LLM verification: ~23 hours for 43K users
- Cost: ~$130 for full verification

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-04 22:56:25 -08:00

852 lines
33 KiB
Python
Executable File

#!/usr/bin/env python3
"""
Twitter-Telegram Candidate Finder
Finds potential Twitter matches for Telegram contacts using:
1. Handle extraction from bios
2. Username variation generation
3. Fuzzy name matching
"""
import sys
import re
import json
from pathlib import Path
from typing import List, Dict, Set, Tuple
import psycopg2
from psycopg2.extras import DictCursor, execute_values
# Add parent directory to path
sys.path.insert(0, str(Path(__file__).parent.parent / 'src'))
from db_config import SessionLocal
from models import Contact
# Twitter database connection (adjust as needed)
TWITTER_DB_CONFIG = {
'dbname': 'twitter_data',
'user': 'andrewjiang', # Adjust to your setup
'host': 'localhost',
'port': 5432
}
class HandleExtractor:
"""Extract Twitter handles from text"""
@staticmethod
def extract_handles(text: str) -> List[str]:
"""Extract Twitter handles from bio/text"""
if not text:
return []
handles = set()
# Pattern 1: @username
pattern1 = r'@([a-zA-Z0-9_]{4,15})'
handles.update(re.findall(pattern1, text))
# Pattern 2: twitter.com/username or x.com/username
pattern2 = r'(?:twitter\.com|x\.com)/([a-zA-Z0-9_]{4,15})'
handles.update(re.findall(pattern2, text, re.IGNORECASE))
# Pattern 3: Clean standalone handles (risky, be conservative)
# Only if text is short and looks like a handle
if len(text) < 30 and text.count('@') == 1:
clean = text.strip('@').strip()
if re.match(r'^[a-zA-Z0-9_]{4,15}$', clean):
handles.add(clean)
return [h.lower() for h in handles if len(h) >= 4]
class UsernameVariationGenerator:
"""Generate Twitter handle variations from Telegram usernames"""
@staticmethod
def generate_variations(telegram_username: str) -> List[str]:
"""
Generate possible Twitter handle variations
Examples:
- alice_0x → [alice_0x, 0xalice, 0x_alice, alice0x]
- trader_69 → [trader_69, 69trader, 69_trader, trader69]
"""
if not telegram_username:
return []
variations = [telegram_username.lower()]
# Remove underscores
no_underscore = telegram_username.replace('_', '')
if no_underscore != telegram_username and len(no_underscore) >= 4:
variations.append(no_underscore.lower())
# Handle "0x" patterns (common in crypto)
if '0x' in telegram_username.lower():
# If 0x at end, try moving to front
if telegram_username.lower().endswith('_0x'):
base = telegram_username[:-3]
variations.extend([
f"0x{base}".lower(),
f"0x_{base}".lower()
])
elif telegram_username.lower().endswith('0x'):
base = telegram_username[:-2]
variations.extend([
f"0x{base}".lower(),
f"0x_{base}".lower()
])
# Try without underscore
no_under = telegram_username.replace('_', '')
if '0x' in no_under.lower() and no_under.lower() not in variations:
variations.append(no_under.lower())
# Handle trailing numbers (alice_69 → 69alice)
match = re.match(r'^([a-z_]+?)_?(\d+)$', telegram_username, re.IGNORECASE)
if match:
prefix, number = match.groups()
prefix = prefix.rstrip('_')
if len(f"{number}{prefix}") >= 4:
variations.extend([
f"{number}{prefix}".lower(),
f"{number}_{prefix}".lower()
])
# Handle leading numbers (69_alice → alice69)
match = re.match(r'^(\d+)_?([a-z_]+)$', telegram_username, re.IGNORECASE)
if match:
number, suffix = match.groups()
suffix = suffix.lstrip('_')
if len(f"{suffix}{number}") >= 4:
variations.extend([
f"{suffix}{number}".lower(),
f"{suffix}_{number}".lower()
])
# Single character removals (banteg → bantg, trader → trade)
# This catches shortened versions of usernames
base = telegram_username.lower()
if len(base) > 4: # Only if result would be at least 4 chars
for i in range(len(base)):
variation = base[:i] + base[i+1:]
if len(variation) >= 4:
variations.append(variation)
# Deduplicate and validate (4-15 chars for Twitter)
valid = []
for v in set(variations):
v_clean = v.strip('_')
if 4 <= len(v_clean) <= 15:
valid.append(v_clean)
return list(set(valid))
class TwitterMatcher:
"""Find Twitter profiles matching Telegram contacts"""
def __init__(self, twitter_conn, telegram_conn=None):
self.twitter_conn = twitter_conn
self.telegram_conn = telegram_conn # Needed for phash lookups
self.handle_extractor = HandleExtractor()
self.variation_generator = UsernameVariationGenerator()
def find_by_handle(self, handle: str) -> Dict:
"""Lookup Twitter profile by exact handle"""
with self.twitter_conn.cursor(cursor_factory=DictCursor) as cur:
cur.execute("""
SELECT
id,
username,
name,
description,
location,
verified,
is_blue_verified,
followers_count,
following_count,
created_at
FROM public.users
WHERE LOWER(username) = %s
LIMIT 1
""", (handle.lower(),))
result = cur.fetchone()
return dict(result) if result else None
def find_by_fuzzy_name(self, telegram_name: str, limit=3) -> List[Dict]:
"""Find Twitter profiles with similar names using fuzzy matching"""
if not telegram_name or len(telegram_name) < 3:
return []
with self.twitter_conn.cursor(cursor_factory=DictCursor) as cur:
# Use parameterized query with proper escaping for % operator
cur.execute("""
SELECT
id,
username,
name,
description,
location,
verified,
is_blue_verified,
followers_count,
following_count,
created_at,
similarity(name, %(name)s) AS name_score
FROM public.users
WHERE name %% %(name)s -- %% for similarity operator (escaped in string)
AND similarity(name, %(name)s) > 0.65 -- Increased threshold from 0.5 to reduce noise
ORDER BY name_score DESC
LIMIT %(limit)s
""", {'name': telegram_name, 'limit': limit})
return [dict(row) for row in cur.fetchall()]
def find_by_display_name_containment(self, telegram_name: str, limit=5) -> List[Dict]:
"""Find Twitter profiles where TG display name is contained in TW display name"""
if not telegram_name or len(telegram_name) < 3:
return []
with self.twitter_conn.cursor(cursor_factory=DictCursor) as cur:
# Direct containment search (case-insensitive)
cur.execute("""
SELECT
id,
username,
name,
description,
location,
verified,
is_blue_verified,
followers_count,
following_count,
created_at
FROM public.users
WHERE name ILIKE %s -- TG name contained in TW name
AND LENGTH(name) >= %s -- TW name must be at least as long as TG name
ORDER BY followers_count DESC -- Prioritize by follower count
LIMIT %s
""", (f'%{telegram_name}%', len(telegram_name), limit))
return [dict(row) for row in cur.fetchall()]
def find_by_phash_match(self, telegram_user_id: int) -> List[Dict]:
"""Find Twitter profiles with matching profile picture phash (distance 0-1 only)"""
if not self.telegram_conn:
return []
with self.telegram_conn.cursor(cursor_factory=DictCursor) as cur:
# Query pre-computed phash matches (distance 0-1 for high confidence)
cur.execute("""
SELECT
m.twitter_user_id,
m.twitter_username,
m.hamming_distance,
m.telegram_phash,
m.twitter_phash
FROM telegram_twitter_phash_matches m
WHERE m.telegram_user_id = %s
AND m.hamming_distance <= 1 -- Only exact and distance-1 matches
ORDER BY m.hamming_distance ASC
LIMIT 5
""", (telegram_user_id,))
phash_matches = cur.fetchall()
# Commit to close transaction
self.telegram_conn.commit()
if not phash_matches:
return []
# Fetch full Twitter profile data for matched users
twitter_ids = [m['twitter_user_id'] for m in phash_matches]
with self.twitter_conn.cursor(cursor_factory=DictCursor) as cur:
cur.execute("""
SELECT
id,
username,
name,
description,
location,
verified,
is_blue_verified,
followers_count,
following_count,
created_at
FROM public.users
WHERE id = ANY(%s)
""", (twitter_ids,))
twitter_profiles = [dict(row) for row in cur.fetchall()]
# Enrich with phash match details
twitter_profile_map = {str(p['id']): p for p in twitter_profiles}
results = []
for match in phash_matches:
tw_id = match['twitter_user_id']
if tw_id in twitter_profile_map:
profile = twitter_profile_map[tw_id].copy()
profile['phash_distance'] = match['hamming_distance']
profile['telegram_phash'] = match['telegram_phash']
profile['twitter_phash'] = match['twitter_phash']
results.append(profile)
return results
def find_by_telegram_in_twitter_bio(self, telegram_username: str, limit=3) -> List[Dict]:
"""Find Twitter profiles that mention this Telegram username in their bio (exact @mention only)"""
if not telegram_username or len(telegram_username) < 4:
return []
with self.twitter_conn.cursor(cursor_factory=DictCursor) as cur:
# Search for various Telegram handle patterns in Twitter bios
# Use regex with word boundaries to avoid substring matches in other handles
cur.execute("""
SELECT
id,
username,
name,
description,
location,
verified,
is_blue_verified,
followers_count,
following_count,
created_at
FROM public.users
WHERE description IS NOT NULL
AND (
description ~* %s -- @username with word boundary (not part of longer handle)
OR LOWER(description) LIKE %s -- t.me/username
OR LOWER(description) LIKE %s -- telegram.me/username
)
ORDER BY followers_count DESC
LIMIT %s
""", (
r'@' + telegram_username + r'(\s|$|[^a-zA-Z0-9_])', # Word boundary: space, end, or non-alphanumeric
f'%t.me/{telegram_username.lower()}%',
f'%telegram.me/{telegram_username.lower()}%',
limit
))
return [dict(row) for row in cur.fetchall()]
def find_by_resolved_url(self, telegram_username: str) -> List[Dict]:
"""Find Twitter profiles whose bio URL resolves to this Telegram username"""
if not telegram_username or len(telegram_username) < 4:
return []
with self.twitter_conn.cursor(cursor_factory=DictCursor) as cur:
# Query url_resolution_queue for Twitter profiles with resolved Telegram links
cur.execute("""
SELECT DISTINCT
u.id,
u.username,
u.name,
u.description,
u.location,
u.verified,
u.is_blue_verified,
u.followers_count,
u.following_count,
u.created_at,
urq.resolved_url,
urq.original_url
FROM url_resolution_queue urq
JOIN public.users u ON u.id::text = urq.twitter_id
WHERE urq.telegram_handles IS NOT NULL
AND urq.telegram_handles @> %s::jsonb
ORDER BY u.followers_count DESC
LIMIT 3
""", (f'["{telegram_username.lower()}"]',))
results = cur.fetchall()
# Enrich with URL details
enriched = []
for row in results:
profile = dict(row)
profile['resolved_telegram_url'] = profile.pop('resolved_url')
profile['original_url'] = profile.pop('original_url')
enriched.append(profile)
return enriched
def find_by_username_in_display_name(self, search_term: str, is_telegram: bool, limit: int = 5) -> List[Dict]:
"""
Find Twitter profiles where display name contains username pattern
If is_telegram=True: Search for TG username in Twitter display names
If is_telegram=False: Search for Twitter username pattern in TG display names (reverse: find Twitter profiles whose username matches the TG display name)
"""
with self.twitter_conn.cursor(cursor_factory=DictCursor) as cur:
if is_telegram:
# TG username in Twitter display name
cur.execute("""
SELECT
id,
username,
name,
description,
location,
verified,
is_blue_verified,
followers_count,
following_count,
created_at
FROM public.users
WHERE name IS NOT NULL
AND LOWER(name) LIKE %s
ORDER BY followers_count DESC
LIMIT %s
""", (f'%{search_term.lower()}%', limit))
else:
# Twitter username in TG display name - search for Twitter profiles whose username appears in the search term
cur.execute("""
SELECT
id,
username,
name,
description,
location,
verified,
is_blue_verified,
followers_count,
following_count,
created_at
FROM public.users
WHERE username IS NOT NULL
AND LOWER(%s) LIKE '%%' || LOWER(username) || '%%'
ORDER BY followers_count DESC
LIMIT %s
""", (search_term, limit))
return [dict(row) for row in cur.fetchall()]
def get_display_name(self, contact: Contact) -> str:
"""Get display name with fallback to first_name + last_name"""
if contact.display_name:
return contact.display_name
# Fallback to first_name + last_name
parts = [contact.first_name, contact.last_name]
return ' '.join(p for p in parts if p).strip()
def find_candidates_for_contact(self, contact: Contact) -> List[Dict]:
"""
Find all Twitter candidates for a single Telegram contact
Returns list of candidates with:
{
'twitter_profile': {...},
'match_method': 'exact_bio_handle' | 'exact_username' | 'username_variation' | 'fuzzy_name',
'baseline_confidence': 0.0-1.0,
'match_details': {...}
}
"""
candidates = []
seen_twitter_ids = set()
# Get display name with fallback
display_name = self.get_display_name(contact)
# Method 1: Phash matching (profile picture similarity)
phash_matches = self.find_by_phash_match(contact.user_id)
for twitter_profile in phash_matches:
phash_distance = twitter_profile.pop('phash_distance')
telegram_phash = twitter_profile.pop('telegram_phash')
twitter_phash = twitter_profile.pop('twitter_phash')
# Baseline confidence based on phash distance
if phash_distance == 0:
baseline_confidence = 0.95 # Exact match - VERY strong signal
elif phash_distance == 1:
baseline_confidence = 0.88 # 1-bit difference - strong signal
else:
continue # Skip distance > 1
candidates.append({
'twitter_profile': twitter_profile,
'match_method': 'phash_match',
'baseline_confidence': baseline_confidence,
'match_details': {
'phash_distance': phash_distance,
'telegram_phash': telegram_phash,
'twitter_phash': twitter_phash
}
})
seen_twitter_ids.add(twitter_profile['id'])
# Method 2: Extract handles from bio
if contact.bio:
bio_handles = self.handle_extractor.extract_handles(contact.bio)
for handle in bio_handles:
twitter_profile = self.find_by_handle(handle)
if twitter_profile and twitter_profile['id'] not in seen_twitter_ids:
candidates.append({
'twitter_profile': twitter_profile,
'match_method': 'exact_bio_handle',
'baseline_confidence': 0.95,
'match_details': {
'extracted_handle': handle,
'from_bio': True
}
})
seen_twitter_ids.add(twitter_profile['id'])
# Method 3: Exact username match
if contact.username:
twitter_profile = self.find_by_handle(contact.username)
if twitter_profile and twitter_profile['id'] not in seen_twitter_ids:
candidates.append({
'twitter_profile': twitter_profile,
'match_method': 'exact_username',
'baseline_confidence': 0.90,
'match_details': {
'telegram_username': contact.username,
'twitter_username': twitter_profile['username']
}
})
seen_twitter_ids.add(twitter_profile['id'])
# Method 4: Username variations
if contact.username:
variations = self.variation_generator.generate_variations(contact.username)
for variation in variations:
if variation == contact.username.lower():
continue # Already checked in Method 2
twitter_profile = self.find_by_handle(variation)
if twitter_profile and twitter_profile['id'] not in seen_twitter_ids:
candidates.append({
'twitter_profile': twitter_profile,
'match_method': 'username_variation',
'baseline_confidence': 0.80,
'match_details': {
'telegram_username': contact.username,
'username_variation': variation,
'twitter_username': twitter_profile['username']
}
})
seen_twitter_ids.add(twitter_profile['id'])
# Method 5: Twitter bio contains Telegram username (reverse lookup)
if contact.username:
reverse_matches = self.find_by_telegram_in_twitter_bio(contact.username, limit=3)
for twitter_profile in reverse_matches:
if twitter_profile['id'] not in seen_twitter_ids:
candidates.append({
'twitter_profile': twitter_profile,
'match_method': 'twitter_bio_has_telegram',
'baseline_confidence': 0.92, # Very high confidence - they explicitly mention their Telegram
'match_details': {
'telegram_username': contact.username,
'twitter_username': twitter_profile['username'],
'found_in_twitter_bio': True
}
})
seen_twitter_ids.add(twitter_profile['id'])
# Method 5b: Twitter bio URL resolves to Telegram username (via url_resolution_queue)
if contact.username:
url_resolved_matches = self.find_by_resolved_url(contact.username)
for twitter_profile in url_resolved_matches:
if twitter_profile['id'] not in seen_twitter_ids:
resolved_url = twitter_profile.pop('resolved_telegram_url', None)
original_url = twitter_profile.pop('original_url', None)
candidates.append({
'twitter_profile': twitter_profile,
'match_method': 'twitter_bio_url_resolves_to_telegram',
'baseline_confidence': 0.95, # VERY high confidence - explicit URL link in bio
'match_details': {
'telegram_username': contact.username,
'twitter_username': twitter_profile['username'],
'resolved_url': resolved_url,
'original_url': original_url,
'found_via_url_resolution': True
}
})
seen_twitter_ids.add(twitter_profile['id'])
# Method 6: Display name containment (TG name in TW name)
if display_name:
containment_matches = self.find_by_display_name_containment(display_name, limit=5)
for twitter_profile in containment_matches:
if twitter_profile['id'] not in seen_twitter_ids:
candidates.append({
'twitter_profile': twitter_profile,
'match_method': 'display_name_containment',
'baseline_confidence': 0.92, # High confidence for exact name containment
'match_details': {
'telegram_name': display_name,
'twitter_name': twitter_profile['name'],
'match_type': 'tg_name_contained_in_tw_name'
}
})
seen_twitter_ids.add(twitter_profile['id'])
# Method 7: Fuzzy name match (always run to find additional candidates)
if display_name:
fuzzy_matches = self.find_by_fuzzy_name(display_name, limit=5)
for i, twitter_profile in enumerate(fuzzy_matches):
name_score = twitter_profile.pop('name_score')
# Calculate baseline confidence from name similarity
baseline_confidence = min(0.85, name_score) # Cap at 0.85 for fuzzy matches
if twitter_profile['id'] not in seen_twitter_ids:
candidates.append({
'twitter_profile': twitter_profile,
'match_method': 'fuzzy_name',
'baseline_confidence': baseline_confidence,
'match_details': {
'telegram_name': display_name,
'twitter_name': twitter_profile['name'],
'fuzzy_score': name_score,
'candidate_rank': i + 1
}
})
seen_twitter_ids.add(twitter_profile['id'])
# Method 8: TG username in Twitter display name
if contact.username:
matches = self.find_by_username_in_display_name(contact.username, is_telegram=True, limit=3)
for twitter_profile in matches:
if twitter_profile['id'] not in seen_twitter_ids:
candidates.append({
'twitter_profile': twitter_profile,
'match_method': 'tg_username_in_twitter_name',
'baseline_confidence': 0.88,
'match_details': {
'telegram_username': contact.username,
'twitter_name': twitter_profile['name'],
'found_in_display_name': True
}
})
seen_twitter_ids.add(twitter_profile['id'])
# Method 9: Twitter username in TG display name
if display_name:
matches = self.find_by_username_in_display_name(display_name, is_telegram=False, limit=3)
for twitter_profile in matches:
if twitter_profile['id'] not in seen_twitter_ids:
candidates.append({
'twitter_profile': twitter_profile,
'match_method': 'twitter_username_in_tg_name',
'baseline_confidence': 0.86,
'match_details': {
'telegram_name': display_name,
'twitter_username': twitter_profile['username'],
'username_in_display_name': True
}
})
seen_twitter_ids.add(twitter_profile['id'])
return candidates
def save_candidates_to_db(telegram_user_id: int, account_id: int, candidates: List[Dict], telegram_db):
"""Save candidates to twitter_match_candidates table"""
if not candidates:
return
insert_data = []
for cand in candidates:
tw = cand['twitter_profile']
insert_data.append((
account_id, # Add account_id
telegram_user_id,
tw['id'],
tw['username'],
tw['name'],
tw.get('description', ''),
tw.get('location'),
tw.get('verified', False),
tw.get('is_blue_verified', False),
tw.get('followers_count', 0),
cand.get('match_details', {}).get('candidate_rank', 1),
cand['match_method'],
cand['baseline_confidence'],
json.dumps(cand['match_details']) # Convert dict to JSON string
))
execute_values(telegram_db, """
INSERT INTO twitter_match_candidates (
account_id,
telegram_user_id,
twitter_id,
twitter_username,
twitter_name,
twitter_bio,
twitter_location,
twitter_verified,
twitter_blue_verified,
twitter_followers_count,
candidate_rank,
match_method,
baseline_confidence,
match_signals
) VALUES %s
ON CONFLICT DO NOTHING
""", insert_data, template="""(
%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s
)""")
def main():
print()
print("=" * 70)
print("🔍 Twitter-Telegram Candidate Finder")
print("=" * 70)
print()
# Check arguments
test_mode = '--test' in sys.argv
limit = 100 if test_mode else None
# Check for --limit parameter
if '--limit' in sys.argv:
idx = sys.argv.index('--limit')
limit = int(sys.argv[idx + 1])
print(f"📊 LIMIT MODE: Processing first {limit:,} contacts")
print()
elif test_mode:
print("🧪 TEST MODE: Processing first 100 contacts only")
print()
# Connect to databases
print("📡 Connecting to databases...")
# Connect SQLAlchemy to localhost (not RDS)
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
localhost_engine = create_engine('postgresql://andrewjiang@localhost:5432/telegram_contacts')
LocalSession = sessionmaker(bind=localhost_engine)
telegram_db = LocalSession()
try:
twitter_conn = psycopg2.connect(**TWITTER_DB_CONFIG)
twitter_conn.autocommit = False
except Exception as e:
print(f"❌ Failed to connect to Twitter database: {e}")
print(f" Config: {TWITTER_DB_CONFIG}")
return False
# Also need psycopg2 connection to telegram DB for writing
try:
telegram_conn = psycopg2.connect(dbname='telegram_contacts', user='andrewjiang', host='localhost', port=5432)
telegram_conn.autocommit = False
except Exception as e:
print(f"❌ Failed to connect to Telegram database: {e}")
return False
print("✅ Connected to both databases")
print()
try:
# Initialize matcher (pass both connections for phash lookup)
telegram_psycopg_conn = psycopg2.connect(
dbname='telegram_contacts',
user='andrewjiang',
host='localhost'
)
matcher = TwitterMatcher(twitter_conn, telegram_psycopg_conn)
# Get Telegram contacts with bios or usernames
print("🔍 Loading Telegram contacts...")
query = telegram_db.query(Contact).filter(
Contact.user_id > 0, # Exclude channels
Contact.is_deleted == False,
Contact.is_bot == False
).filter(
(Contact.bio != None) | (Contact.username != None)
).order_by(Contact.user_id)
if limit:
query = query.limit(limit)
contacts = query.all()
print(f"✅ Found {len(contacts):,} contacts to process")
print()
# Process each contact
stats = {
'processed': 0,
'with_candidates': 0,
'total_candidates': 0,
'by_method': {}
}
print("🚀 Finding Twitter candidates...")
print()
for i, contact in enumerate(contacts, 1):
candidates = matcher.find_candidates_for_contact(contact)
if candidates:
stats['with_candidates'] += 1
stats['total_candidates'] += len(candidates)
# Track by method
for cand in candidates:
method = cand['match_method']
stats['by_method'][method] = stats['by_method'].get(method, 0) + 1
# Save to database
with telegram_conn.cursor() as cur:
save_candidates_to_db(contact.user_id, contact.account_id, candidates, cur)
telegram_conn.commit()
stats['processed'] += 1
# Progress update every 10 contacts
if i % 10 == 0:
print(f" Processed {i:,}/{len(contacts):,} contacts... (candidates: {stats['with_candidates']:,}, total: {stats['total_candidates']:,})")
elif i == 1:
# Show first contact immediately
print(f" Processed 1/{len(contacts):,} contacts... (candidates: {stats['with_candidates']:,}, total: {stats['total_candidates']:,})")
# Final stats
print()
print("=" * 70)
print("✅ CANDIDATE FINDING COMPLETE")
print("=" * 70)
print()
print(f"📊 Statistics:")
print(f" Processed: {stats['processed']:,} contacts")
print(f" With candidates: {stats['with_candidates']:,} ({stats['with_candidates']/stats['processed']*100:.1f}%)")
print(f" Total candidates: {stats['total_candidates']:,}")
print(f" Avg candidates per match: {stats['total_candidates']/max(stats['with_candidates'], 1):.1f}")
print()
print(f"📈 By method:")
for method, count in sorted(stats['by_method'].items(), key=lambda x: -x[1]):
print(f" {method}: {count:,}")
print()
return True
except Exception as e:
print(f"❌ Error: {e}")
import traceback
traceback.print_exc()
return False
finally:
telegram_db.close()
twitter_conn.close()
telegram_conn.close()
if __name__ == "__main__":
try:
success = main()
sys.exit(0 if success else 1)
except KeyboardInterrupt:
print("\n\n⚠️ Interrupted by user")
sys.exit(1)