#!/usr/bin/env npx ts-node

/**
 * Keyword Analyzer and Clustering Tool for Programmatic SEO
 *
 * Usage:
 *   npx ts-node analyze-keywords.ts --input keywords.csv --output clusters.json
 *   npx ts-node analyze-keywords.ts --input keywords.csv --min-volume 100
 */

import * as fs from 'fs'

// Types
interface Keyword {
  keyword: string
  volume: number
  difficulty: number
  intent?: string
}

interface Cluster {
  name: string
  primary: string
  keywords: Keyword[]
  totalVolume: number
  avgDifficulty: number
  intent: string
  template: string
}

// Intent detection patterns
const intentPatterns = {
  informational: ['how to', 'what is', 'guide', 'tutorial', 'learn', 'tips', 'ideas', 'examples'],
  transactional: ['buy', 'price', 'cost', 'best', 'top', 'review', 'compare', 'cheap', 'deal'],
  navigational: ['login', 'sign in', 'official', 'website'],
  local: ['near me', 'in ', ' at ', 'nearby', 'local']
}

// Template mapping based on intent
const templateMapping: Record<string, string> = {
  informational: 'how-to-guide',
  transactional: 'product-list',
  navigational: 'brand-page',
  local: 'city-guide'
}

// Utility functions
function detectIntent(keyword: string): string {
  const lower = keyword.toLowerCase()

  for (const [intent, patterns] of Object.entries(intentPatterns)) {
    if (patterns.some(p => lower.includes(p))) {
      return intent
    }
  }
  return 'informational' // default
}

function extractModifier(keyword: string): { base: string; modifier: string } {
  // Extract city/location
  const locationMatch = keyword.match(/in\s+([a-zA-Z\s]+)$/i)
  if (locationMatch) {
    return {
      base: keyword.replace(locationMatch[0], '').trim(),
      modifier: locationMatch[1].trim()
    }
  }

  // Extract "best X" pattern
  const bestMatch = keyword.match(/^best\s+(.+)/i)
  if (bestMatch) {
    return {
      base: bestMatch[1].trim(),
      modifier: 'best'
    }
  }

  return { base: keyword, modifier: '' }
}

function similarity(a: string, b: string): number {
  const wordsA = new Set(a.toLowerCase().split(/\s+/))
  const wordsB = new Set(b.toLowerCase().split(/\s+/))

  const intersection = [...wordsA].filter(w => wordsB.has(w)).length
  const union = new Set([...wordsA, ...wordsB]).size

  return intersection / union // Jaccard similarity
}

function parseCSV(content: string): Keyword[] {
  const lines = content.trim().split('\n')
  const headers = lines[0].toLowerCase().split(',').map(h => h.trim())

  const keywordIndex = headers.findIndex(h => h === 'keyword' || h === 'query')
  const volumeIndex = headers.findIndex(h => h === 'volume' || h === 'search_volume')
  const difficultyIndex = headers.findIndex(h => h === 'difficulty' || h === 'kd')

  if (keywordIndex === -1) {
    throw new Error('CSV must have a "keyword" or "query" column')
  }

  return lines.slice(1).map(line => {
    const values = line.split(',').map(v => v.trim())
    return {
      keyword: values[keywordIndex],
      volume: volumeIndex !== -1 ? parseInt(values[volumeIndex]) || 0 : 0,
      difficulty: difficultyIndex !== -1 ? parseInt(values[difficultyIndex]) || 0 : 0,
      intent: detectIntent(values[keywordIndex])
    }
  }).filter(k => k.keyword)
}

function clusterKeywords(keywords: Keyword[], threshold: number = 0.3): Cluster[] {
  const clusters: Cluster[] = []
  const assigned = new Set<number>()

  // Sort by volume (descending) to use high-volume keywords as cluster seeds
  const sorted = [...keywords].sort((a, b) => b.volume - a.volume)

  for (let i = 0; i < sorted.length; i++) {
    if (assigned.has(i)) continue

    const seed = sorted[i]
    const cluster: Keyword[] = [seed]
    assigned.add(i)

    // Find similar keywords
    for (let j = i + 1; j < sorted.length; j++) {
      if (assigned.has(j)) continue

      const candidate = sorted[j]
      const sim = similarity(seed.keyword, candidate.keyword)

      // Also check if they share the same base/modifier pattern
      const seedParts = extractModifier(seed.keyword)
      const candParts = extractModifier(candidate.keyword)
      const sameBase = seedParts.base === candParts.base

      if (sim >= threshold || sameBase) {
        cluster.push(candidate)
        assigned.add(j)
      }
    }

    // Calculate cluster stats
    const totalVolume = cluster.reduce((sum, k) => sum + k.volume, 0)
    const avgDifficulty = Math.round(cluster.reduce((sum, k) => sum + k.difficulty, 0) / cluster.length)
    const primaryIntent = detectIntent(seed.keyword)

    clusters.push({
      name: extractModifier(seed.keyword).base || seed.keyword,
      primary: seed.keyword,
      keywords: cluster,
      totalVolume,
      avgDifficulty,
      intent: primaryIntent,
      template: templateMapping[primaryIntent]
    })
  }

  return clusters.sort((a, b) => b.totalVolume - a.totalVolume)
}

// Priority scoring
function scoreClusters(clusters: Cluster[]): Array<Cluster & { score: number; priority: string }> {
  const maxVolume = Math.max(...clusters.map(c => c.totalVolume))
  const maxDifficulty = Math.max(...clusters.map(c => c.avgDifficulty))

  return clusters.map(cluster => {
    // Normalize scores (0-1)
    const volumeScore = cluster.totalVolume / maxVolume
    const difficultyScore = 1 - (cluster.avgDifficulty / maxDifficulty)

    // Combined score (weight volume higher)
    const score = (volumeScore * 0.6) + (difficultyScore * 0.4)

    let priority: string
    if (score >= 0.7) priority = 'high'
    else if (score >= 0.4) priority = 'medium'
    else priority = 'low'

    return { ...cluster, score: Math.round(score * 100), priority }
  })
}

// Main function
async function main() {
  const args = process.argv.slice(2)
  const inputIndex = args.indexOf('--input')
  const outputIndex = args.indexOf('--output')
  const minVolumeIndex = args.indexOf('--min-volume')
  const thresholdIndex = args.indexOf('--threshold')

  if (inputIndex === -1) {
    console.error('Usage: npx ts-node analyze-keywords.ts --input <file.csv> [--output <file.json>] [--min-volume <number>] [--threshold <0-1>]')
    process.exit(1)
  }

  const inputFile = args[inputIndex + 1]
  const outputFile = outputIndex !== -1 ? args[outputIndex + 1] : 'clusters.json'
  const minVolume = minVolumeIndex !== -1 ? parseInt(args[minVolumeIndex + 1]) : 0
  const threshold = thresholdIndex !== -1 ? parseFloat(args[thresholdIndex + 1]) : 0.3

  // Read and parse keywords
  console.log(`\n📊 Reading keywords from ${inputFile}...`)
  const content = fs.readFileSync(inputFile, 'utf-8')
  let keywords = parseCSV(content)

  console.log(`   Found ${keywords.length} keywords`)

  // Filter by minimum volume
  if (minVolume > 0) {
    keywords = keywords.filter(k => k.volume >= minVolume)
    console.log(`   After volume filter (>=${minVolume}): ${keywords.length} keywords`)
  }

  // Cluster keywords
  console.log(`\n🔗 Clustering keywords (threshold: ${threshold})...`)
  const clusters = clusterKeywords(keywords, threshold)
  console.log(`   Created ${clusters.length} clusters`)

  // Score and prioritize
  const scored = scoreClusters(clusters)

  // Output summary
  console.log('\n📋 Top Clusters by Priority:\n')
  console.log('Priority | Volume | Difficulty | Keywords | Primary Keyword')
  console.log('---------|--------|------------|----------|----------------')

  scored.slice(0, 10).forEach(c => {
    const priority = c.priority.padEnd(8)
    const volume = String(c.totalVolume).padStart(6)
    const difficulty = String(c.avgDifficulty).padStart(10)
    const count = String(c.keywords.length).padStart(8)
    console.log(`${priority} | ${volume} | ${difficulty} | ${count} | ${c.primary}`)
  })

  // Write output
  const output = {
    generated: new Date().toISOString(),
    totalKeywords: keywords.length,
    totalClusters: clusters.length,
    clusters: scored
  }

  fs.writeFileSync(outputFile, JSON.stringify(output, null, 2))
  console.log(`\n✅ Clusters saved to: ${outputFile}`)

  // Summary stats
  const byIntent = scored.reduce((acc, c) => {
    acc[c.intent] = (acc[c.intent] || 0) + 1
    return acc
  }, {} as Record<string, number>)

  console.log('\n📈 Intent Distribution:')
  Object.entries(byIntent).forEach(([intent, count]) => {
    console.log(`   ${intent}: ${count} clusters`)
  })
}

main().catch(console.error)
