import re
import unicodedata
from dataclasses import dataclass
from datetime import date, datetime, time, timedelta
from os import environ
from typing import Iterable, List, Optional
from zoneinfo import ZoneInfo

from dotenv import load_dotenv
from google.oauth2 import service_account
from googleapiclient import discovery
from sqlalchemy.orm import Session as SQLAlchemySession

from db import Session
from logger import log
from models import Dolgozo, MunkaNapok, TavolletNyilvantartas

load_dotenv()

CALENDAR_ID = environ.get(
    'GOOGLE_CALENDAR_ID',
    'qvorb8l2e3826f60mhlqpejc9k@group.calendar.google.com')
SERVICE_ACCOUNT_FILE = environ.get('GOOGLE_SERVICE_ACCOUNT_FILE',
                                   'szabik/service_credentials.json')
SCOPES = ['https://www.googleapis.com/auth/calendar.events.readonly']
BUDAPEST = ZoneInfo('Europe/Budapest')
FULL_DAY_MINUTES = 8 * 60
HALF_DAY_MINUTES = 4 * 60

# Calendar titles use short names, so these aliases anchor parsing to DB IDs.
ACTIVE_ALIASES = {
    'annamari': 22,
    'arpi': 3,
    'bandi': 5,
    'bence': 11,
    'csaba': 14,
    'csabam': 14,
    'dani': 24,
    'dia': 26,
    'gabor': 10,
    'marc': 4,
    'mara': 27,
    'mester': 15,
    'norbi': 17,
    'roland': 21,
    'tibi': 16,
    'zsolti': 50,
}
INACTIVE_ALIASES = {'agi', 'dev', 'don', 'donat', 'ili', 'szeverin', 'zoli'}
HOLIDAY_WORDS = {'dzabi', 'szabi', 'szabadnap', 'szabadsag', 'szabin'}
SICK_WORDS = {'beteg', 'betegallomany', 'betegseg', 'tappenz'}
HALF_DAY_WORDS = {'1/2', 'fel nap', 'felnap'}
IGNORE_PHRASES = {'gyakorlati vizsga', 'kesik', 'nem nyaral', 'vizsgazik'}


@dataclass
class ParsedCalendarAbsence:
  azonosito: Optional[int]
  datum: date
  minutes: int
  absence_type: str
  source_id: str
  summary: str
  match_status: str


def normalize_text(value: str) -> str:
  normalized = unicodedata.normalize('NFKD', value or '')
  ascii_text = ''.join(char for char in normalized
                       if not unicodedata.combining(char))
  ascii_text = re.sub(r'[^\w\s/<>-]', ' ', ascii_text.lower())
  return re.sub(r'\s+', ' ', ascii_text).strip()


def get_calendar_service():
  credentials = service_account.Credentials.from_service_account_file(
      SERVICE_ACCOUNT_FILE, scopes=SCOPES)
  return discovery.build('calendar',
                         'v3',
                         credentials=credentials,
                         cache_discovery=False)


def fetch_calendar_events(start_date: date, end_date: date) -> List[dict]:
  service = get_calendar_service()
  events: List[dict] = []
  page_token = None
  time_min = datetime.combine(start_date, time.min,
                              tzinfo=BUDAPEST).isoformat()
  time_max = datetime.combine(end_date, time.max, tzinfo=BUDAPEST).isoformat()

  while True:
    result = service.events().list(calendarId=CALENDAR_ID,
                                   timeMin=time_min,
                                   timeMax=time_max,
                                   singleEvents=True,
                                   orderBy='startTime',
                                   maxResults=2500,
                                   pageToken=page_token).execute()
    events.extend(result.get('items', []))
    page_token = result.get('nextPageToken')
    if not page_token:
      return events


def classify_title(summary: str) -> Optional[str]:
  title = normalize_text(summary)
  if any(phrase in title for phrase in IGNORE_PHRASES):
    return None
  if any(word in title for word in SICK_WORDS):
    return 'sick_leave'
  if any(word in title for word in HOLIDAY_WORDS):
    return 'holiday'
  if 'nyaral' in title:
    return 'holiday'
  return None


def title_has_half_day(summary: str) -> bool:
  title = normalize_text(summary)
  return any(word in title for word in HALF_DAY_WORDS)


def extract_alias(summary: str) -> Optional[str]:
  title = normalize_text(summary)
  title = re.sub(r'<.*$', ' ', title)
  title = title.replace('-', ' ')
  for word in HOLIDAY_WORDS | SICK_WORDS | HALF_DAY_WORDS | {'nyaral'}:
    title = title.replace(word, ' ')
  title = re.sub(r'\b(de|es|mert|nem|biztos|foldmeres|temetes)\b', ' ', title)
  title = re.sub(r'\s+', ' ', title).strip()
  return title.split()[0] if title else None


def resolve_employee_id(
    summary: str, employees: dict[str, int]) -> tuple[Optional[int], str]:
  alias = extract_alias(summary)
  if not alias:
    return None, 'unmatched'
  if alias in INACTIVE_ALIASES:
    return None, 'inactive'
  if alias in ACTIVE_ALIASES:
    return ACTIVE_ALIASES[alias], 'matched'
  if alias in employees:
    return employees[alias], 'matched'
  return None, 'unmatched'


def get_employee_aliases(session: SQLAlchemySession) -> dict[str, int]:
  employees = session.query(Dolgozo.azonosito, Dolgozo.Nev).all()
  aliases: dict[str, int] = {}
  for azonosito, name in employees:
    if '?' in str(name):
      continue
    normalized_parts = normalize_text(str(name)).split()
    for part in normalized_parts:
      aliases.setdefault(part, azonosito)
  return aliases


def parse_event_dates(event: dict) -> Iterable[tuple[date, int]]:
  start = event.get('start', {})
  end = event.get('end', {})
  summary = event.get('summary', '')

  if 'date' in start:
    current = date.fromisoformat(start['date'])
    end_date = date.fromisoformat(end['date'])
    minutes = HALF_DAY_MINUTES if title_has_half_day(
        summary) else FULL_DAY_MINUTES
    while current < end_date:
      yield current, minutes
      current += timedelta(days=1)
    return

  start_time = datetime.fromisoformat(start['dateTime'])
  end_time = datetime.fromisoformat(end['dateTime'])
  minutes = int((end_time - start_time).total_seconds() // 60)
  if title_has_half_day(summary) or 0 < minutes <= HALF_DAY_MINUTES:
    minutes = HALF_DAY_MINUTES
  else:
    minutes = FULL_DAY_MINUTES
  yield start_time.astimezone(BUDAPEST).date(), minutes


def is_workday(session: SQLAlchemySession, target_date: date) -> bool:
  day_of_year = target_date.timetuple().tm_yday
  workday_info = session.query(MunkaNapok).filter(
      MunkaNapok.napszama == day_of_year).one_or_none()
  if workday_info:
    return workday_info.tipus == 'munkanap'
  # Future years may not be loaded in Munka_napok yet; weekdays are the safest fallback.
  return target_date.weekday() < 5


def parse_calendar_events(
    events: Iterable[dict],
    session: SQLAlchemySession,
    workdays_only: bool = True,
    start_date: Optional[date] = None,
    end_date: Optional[date] = None) -> List[ParsedCalendarAbsence]:
  employee_aliases = get_employee_aliases(session)
  parsed: List[ParsedCalendarAbsence] = []

  for event in events:
    summary = event.get('summary', '')
    absence_type = classify_title(summary)
    if not absence_type:
      continue
    azonosito, match_status = resolve_employee_id(summary, employee_aliases)
    for event_date, minutes in parse_event_dates(event):
      if start_date and event_date < start_date:
        continue
      if end_date and event_date > end_date:
        continue
      if workdays_only and not is_workday(session, event_date):
        continue
      parsed.append(
          ParsedCalendarAbsence(azonosito=azonosito,
                                datum=event_date,
                                minutes=minutes,
                                absence_type=absence_type,
                                source_id=event.get('id', ''),
                                summary=summary,
                                match_status=match_status))
  return parsed


def upsert_calendar_absences(
    session: SQLAlchemySession,
    parsed_absences: Iterable[ParsedCalendarAbsence]) -> int:
  changed = 0
  for absence in parsed_absences:
    if absence.azonosito is None:
      log(f"Calendar absence not matched: {absence.summary}", 'WARNING')
      continue
    record = session.query(TavolletNyilvantartas).filter(
        TavolletNyilvantartas.source == 'google_calendar',
        TavolletNyilvantartas.source_id == absence.source_id,
        TavolletNyilvantartas.azonosito == absence.azonosito,
        TavolletNyilvantartas.datum == absence.datum).one_or_none()
    if record is None:
      record = TavolletNyilvantartas(azonosito=absence.azonosito,
                                     datum=absence.datum,
                                     minutes=absence.minutes,
                                     absence_type=absence.absence_type,
                                     source='google_calendar',
                                     source_id=absence.source_id,
                                     summary=absence.summary,
                                     status='active',
                                     match_status=absence.match_status)
    else:
      record.minutes = absence.minutes
      record.absence_type = absence.absence_type
      record.summary = absence.summary
      record.status = 'active'
      record.match_status = absence.match_status
    session.add(record)
    changed += 1
  return changed


def cancel_missing_calendar_absences(
    session: SQLAlchemySession,
    parsed_absences: Iterable[ParsedCalendarAbsence], start_date: date,
    end_date: date) -> int:
  current_keys = {(absence.source_id, absence.azonosito, absence.datum)
                  for absence in parsed_absences
                  if absence.azonosito is not None}
  active_records = session.query(TavolletNyilvantartas).filter(
      TavolletNyilvantartas.source == 'google_calendar',
      TavolletNyilvantartas.status == 'active', TavolletNyilvantartas.datum
      >= start_date, TavolletNyilvantartas.datum <= end_date).all()

  cancelled = 0
  for record in active_records:
    key = (record.source_id, record.azonosito, record.datum)
    if key in current_keys:
      continue
    # Keep old rows for audit, but exclude removed calendar events from totals.
    record.status = 'cancelled'
    session.add(record)
    cancelled += 1
  return cancelled


def sync_calendar_absences(start_date: Optional[date] = None,
                           end_date: Optional[date] = None) -> None:
  today = datetime.now(BUDAPEST).date()
  start_date = start_date or date(today.year, 1, 1)
  end_date = end_date or date(today.year, 12, 31)
  events = fetch_calendar_events(start_date, end_date)
  with Session() as session:
    parsed = parse_calendar_events(events,
                                   session,
                                   start_date=start_date,
                                   end_date=end_date)
    changed = upsert_calendar_absences(session, parsed)
    cancelled = cancel_missing_calendar_absences(session, parsed, start_date,
                                                 end_date)
    session.commit()
  log(
      f"Synced {changed} Google Calendar absence rows, cancelled {cancelled} stale rows.",
      'INFO')


def dry_run_calendar_absences(start_date: Optional[date] = None,
                              end_date: Optional[date] = None) -> None:
  today = datetime.now(BUDAPEST).date()
  start_date = start_date or date(today.year, 1, 1)
  end_date = end_date or date(today.year, 12, 31)
  events = fetch_calendar_events(start_date, end_date)
  with Session() as session:
    parsed = parse_calendar_events(events,
                                   session,
                                   start_date=start_date,
                                   end_date=end_date)
  for absence in parsed:
    print(
        f"{absence.datum} | {absence.azonosito} | {absence.absence_type} | {absence.minutes} | {absence.match_status} | {absence.summary}"
    )


if __name__ == '__main__':
  dry_run_calendar_absences()
