#!/usr/bin/env python3 """ Twitter-Telegram Profile Matching System Main menu for finding candidates and verifying matches with LLM """ import sys import os import subprocess import psycopg2 # Add parent directory to path for imports sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', 'src')) # Database configuration DB_CONFIG = { 'dbname': 'telegram_contacts', 'user': 'andrewjiang', 'host': 'localhost', 'port': 5432 } TWITTER_DB_CONFIG = { 'dbname': 'twitter_data', 'user': 'andrewjiang', 'host': 'localhost', 'port': 5432 } def get_stats(): """Get current matching statistics""" conn = psycopg2.connect(**DB_CONFIG) cur = conn.cursor() stats = {} # Candidates stats cur.execute(""" SELECT COUNT(DISTINCT telegram_user_id) as total_users, COUNT(*) as total_candidates, COUNT(*) FILTER (WHERE llm_processed = TRUE) as processed_candidates, COUNT(*) FILTER (WHERE llm_processed = FALSE) as pending_candidates FROM twitter_match_candidates """) row = cur.fetchone() stats['total_users'] = row[0] stats['total_candidates'] = row[1] stats['processed_candidates'] = row[2] stats['pending_candidates'] = row[3] # Matches stats cur.execute(""" SELECT COUNT(*) as total_matches, AVG(final_confidence) as avg_confidence, COUNT(*) FILTER (WHERE final_confidence >= 0.90) as high_conf, COUNT(*) FILTER (WHERE final_confidence >= 0.80 AND final_confidence < 0.90) as med_conf, COUNT(*) FILTER (WHERE final_confidence >= 0.70 AND final_confidence < 0.80) as low_conf FROM twitter_telegram_matches """) row = cur.fetchone() stats['total_matches'] = row[0] stats['avg_confidence'] = row[1] or 0 stats['high_conf'] = row[2] stats['med_conf'] = row[3] stats['low_conf'] = row[4] # Users with matches cur.execute(""" SELECT COUNT(DISTINCT telegram_user_id) FROM twitter_telegram_matches """) stats['users_with_matches'] = cur.fetchone()[0] cur.close() conn.close() return stats def print_header(): """Print main header""" print() print("=" * 80) print("๐Ÿ”— Twitter-Telegram Profile Matching System") print("=" * 80) print() def print_stats(): """Print current statistics""" stats = get_stats() print("๐Ÿ“Š Current Statistics:") print("-" * 80) print(f"Candidates:") print(f" โ€ข Users with candidates: {stats['total_users']:,}") print(f" โ€ข Total candidates found: {stats['total_candidates']:,}") print(f" โ€ข Processed by LLM: {stats['processed_candidates']:,}") print(f" โ€ข Pending verification: {stats['pending_candidates']:,}") print() print(f"Verified Matches:") print(f" โ€ข Users with matches: {stats['users_with_matches']:,}") print(f" โ€ข Total matches: {stats['total_matches']:,}") print(f" โ€ข Average confidence: {stats['avg_confidence']:.2f}") print(f" โ€ข High confidence (90%+): {stats['high_conf']:,}") print(f" โ€ข Medium confidence (80-89%): {stats['med_conf']:,}") print(f" โ€ข Low confidence (70-79%): {stats['low_conf']:,}") print("-" * 80) print() def run_script(script_name, *args): """Run a Python script with arguments""" script_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), f"{script_name}.py") cmd = ['python3.10', script_path] + list(args) subprocess.run(cmd) def main(): while True: print_header() print_stats() print("๐Ÿ“‹ Main Menu:") print() print("STEP 1: Find Candidates") print(" 1. Find Twitter candidates (threaded, RECOMMENDED)") print(" 2. Find Twitter candidates (single-threaded)") print() print("STEP 2: Verify with LLM") print(" 3. Verify matches with LLM (async, RECOMMENDED)") print(" 4. Verify matches with LLM (test mode - 50 users)") print() print("Analysis & Review") print(" 5. Review match quality") print(" 6. Show statistics only") print() print(" 0. Exit") print() choice = input("๐Ÿ‘‰ Enter your choice: ").strip() if choice == '0': print("\n๐Ÿ‘‹ Goodbye!\n") break elif choice == '1': # Find candidates (threaded) print() print("๐Ÿ” Finding Twitter candidates (threaded mode)...") print() limit_input = input("๐Ÿ‘‰ How many contacts? (press Enter for all): ").strip() workers = input("๐Ÿ‘‰ Number of worker threads (default: 8): ").strip() or '8' if limit_input: run_script('find_twitter_candidates_threaded', '--limit', limit_input, '--workers', workers) else: run_script('find_twitter_candidates_threaded', '--workers', workers) input("\nโœ… Press Enter to continue...") elif choice == '2': # Find candidates (single-threaded) print() print("๐Ÿ” Finding Twitter candidates (single-threaded mode)...") print() limit_input = input("๐Ÿ‘‰ How many contacts? (press Enter for all): ").strip() if limit_input: run_script('find_twitter_candidates', '--limit', limit_input) else: run_script('find_twitter_candidates') input("\nโœ… Press Enter to continue...") elif choice == '3': # Verify with LLM (async) print() print("๐Ÿค– Verifying matches with LLM (async mode)...") print() concurrent = input("๐Ÿ‘‰ Concurrent requests (default: 100): ").strip() or '100' run_script('verify_twitter_matches_v2', '--verbose', '--concurrent', concurrent) input("\nโœ… Press Enter to continue...") elif choice == '4': # Verify with LLM (test mode) print() print("๐Ÿงช Test mode: Verifying 50 users with LLM...") print() run_script('verify_twitter_matches_v2', '--test', '--limit', '50', '--verbose', '--concurrent', '10') input("\nโœ… Press Enter to continue...") elif choice == '5': # Review match quality print() print("๐Ÿ“Š Reviewing match quality...") print() run_script('review_match_quality') input("\nโœ… Press Enter to continue...") elif choice == '6': # Just show stats, loop back to menu continue else: print("\nโŒ Invalid choice. Please try again.\n") input("Press Enter to continue...") if __name__ == "__main__": try: main() except KeyboardInterrupt: print("\n\n๐Ÿ‘‹ Interrupted. Goodbye!\n") sys.exit(0)