ProfileMatching/main.py

#!/usr/bin/env python3
"""
Twitter-Telegram Profile Matching System
Main menu for finding candidates and verifying matches with LLM
"""

import sys
import os
import subprocess
import psycopg2

# Add parent directory to path for imports
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', 'src'))

# Database configuration
DB_CONFIG = {
    'dbname': 'telegram_contacts',
    'user': 'andrewjiang',
    'host': 'localhost',
    'port': 5432
}

TWITTER_DB_CONFIG = {
    'dbname': 'twitter_data',
    'user': 'andrewjiang',
    'host': 'localhost',
    'port': 5432
}


def get_stats():
    """Get current matching statistics"""
    conn = psycopg2.connect(**DB_CONFIG)
    cur = conn.cursor()

    stats = {}

    # Candidates stats
    cur.execute("""
        SELECT
            COUNT(DISTINCT telegram_user_id) as total_users,
            COUNT(*) as total_candidates,
            COUNT(*) FILTER (WHERE llm_processed = TRUE) as processed_candidates,
            COUNT(*) FILTER (WHERE llm_processed = FALSE) as pending_candidates
        FROM twitter_match_candidates
    """)
    row = cur.fetchone()
    stats['total_users'] = row[0]
    stats['total_candidates'] = row[1]
    stats['processed_candidates'] = row[2]
    stats['pending_candidates'] = row[3]

    # Matches stats
    cur.execute("""
        SELECT
            COUNT(*) as total_matches,
            AVG(final_confidence) as avg_confidence,
            COUNT(*) FILTER (WHERE final_confidence >= 0.90) as high_conf,
            COUNT(*) FILTER (WHERE final_confidence >= 0.80 AND final_confidence < 0.90) as med_conf,
            COUNT(*) FILTER (WHERE final_confidence >= 0.70 AND final_confidence < 0.80) as low_conf
        FROM twitter_telegram_matches
    """)
    row = cur.fetchone()
    stats['total_matches'] = row[0]
    stats['avg_confidence'] = row[1] or 0
    stats['high_conf'] = row[2]
    stats['med_conf'] = row[3]
    stats['low_conf'] = row[4]

    # Users with matches
    cur.execute("""
        SELECT COUNT(DISTINCT telegram_user_id)
        FROM twitter_telegram_matches
    """)
    stats['users_with_matches'] = cur.fetchone()[0]

    cur.close()
    conn.close()

    return stats


def print_header():
    """Print main header"""
    print()
    print("=" * 80)
    print("🔗 Twitter-Telegram Profile Matching System")
    print("=" * 80)
    print()


def print_stats():
    """Print current statistics"""
    stats = get_stats()

    print("📊 Current Statistics:")
    print("-" * 80)
    print(f"Candidates:")
    print(f"  • Users with candidates: {stats['total_users']:,}")
    print(f"  • Total candidates found: {stats['total_candidates']:,}")
    print(f"  • Processed by LLM: {stats['processed_candidates']:,}")
    print(f"  • Pending verification: {stats['pending_candidates']:,}")
    print()
    print(f"Verified Matches:")
    print(f"  • Users with matches: {stats['users_with_matches']:,}")
    print(f"  • Total matches: {stats['total_matches']:,}")
    print(f"  • Average confidence: {stats['avg_confidence']:.2f}")
    print(f"  • High confidence (90%+): {stats['high_conf']:,}")
    print(f"  • Medium confidence (80-89%): {stats['med_conf']:,}")
    print(f"  • Low confidence (70-79%): {stats['low_conf']:,}")
    print("-" * 80)
    print()


def run_script(script_name, *args):
    """Run a Python script with arguments"""
    script_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), f"{script_name}.py")
    cmd = ['python3.10', script_path] + list(args)
    subprocess.run(cmd)


def main():
    while True:
        print_header()
        print_stats()

        print("📋 Main Menu:")
        print()
        print("STEP 1: Find Candidates")
        print("  1. Find Twitter candidates (threaded, RECOMMENDED)")
        print("  2. Find Twitter candidates (single-threaded)")
        print()
        print("STEP 2: Verify with LLM")
        print("  3. Verify matches with LLM (async, RECOMMENDED)")
        print("  4. Verify matches with LLM (test mode - 50 users)")
        print()
        print("Analysis & Review")
        print("  5. Review match quality")
        print("  6. Show statistics only")
        print()
        print("  0. Exit")
        print()

        choice = input("👉 Enter your choice: ").strip()

        if choice == '0':
            print("\n👋 Goodbye!\n")
            break

        elif choice == '1':
            # Find candidates (threaded)
            print()
            print("🔍 Finding Twitter candidates (threaded mode)...")
            print()
            limit_input = input("👉 How many contacts? (press Enter for all): ").strip()
            workers = input("👉 Number of worker threads (default: 8): ").strip() or '8'

            if limit_input:
                run_script('find_twitter_candidates_threaded', '--limit', limit_input, '--workers', workers)
            else:
                run_script('find_twitter_candidates_threaded', '--workers', workers)

            input("\n✅ Press Enter to continue...")

        elif choice == '2':
            # Find candidates (single-threaded)
            print()
            print("🔍 Finding Twitter candidates (single-threaded mode)...")
            print()
            limit_input = input("👉 How many contacts? (press Enter for all): ").strip()

            if limit_input:
                run_script('find_twitter_candidates', '--limit', limit_input)
            else:
                run_script('find_twitter_candidates')

            input("\n✅ Press Enter to continue...")

        elif choice == '3':
            # Verify with LLM (async)
            print()
            print("🤖 Verifying matches with LLM (async mode)...")
            print()
            concurrent = input("👉 Concurrent requests (default: 100): ").strip() or '100'

            run_script('verify_twitter_matches_v2', '--verbose', '--concurrent', concurrent)

            input("\n✅ Press Enter to continue...")

        elif choice == '4':
            # Verify with LLM (test mode)
            print()
            print("🧪 Test mode: Verifying 50 users with LLM...")
            print()

            run_script('verify_twitter_matches_v2', '--test', '--limit', '50', '--verbose', '--concurrent', '10')

            input("\n✅ Press Enter to continue...")

        elif choice == '5':
            # Review match quality
            print()
            print("📊 Reviewing match quality...")
            print()

            run_script('review_match_quality')

            input("\n✅ Press Enter to continue...")

        elif choice == '6':
            # Just show stats, loop back to menu
            continue

        else:
            print("\n❌ Invalid choice. Please try again.\n")
            input("Press Enter to continue...")


if __name__ == "__main__":
    try:
        main()
    except KeyboardInterrupt:
        print("\n\n👋 Interrupted. Goodbye!\n")
        sys.exit(0)