diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..be1a6e7
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,60 @@
+# Dockerfile
+FROM python:3.11-slim
+
+ENV PYTHONDONTWRITEBYTECODE=1 \
+ PYTHONUNBUFFERED=1 \
+ PIP_NO_CACHE_DIR=1 \
+ DEBIAN_FRONTEND=noninteractive \
+ TZ=America/New_York
+
+# System deps for Chromium/chromedriver + rendering & lxml
+RUN apt-get update && apt-get install -y --no-install-recommends \
+ chromium \
+ chromium-driver \
+ ca-certificates \
+ fonts-liberation \
+ fonts-dejavu \
+ libx11-6 \
+ libxcomposite1 \
+ libxdamage1 \
+ libxext6 \
+ libxfixes3 \
+ libxrandr2 \
+ libgbm1 \
+ libgtk-3-0 \
+ libnss3 \
+ libasound2 \
+ libatk-bridge2.0-0 \
+ libatk1.0-0 \
+ libcairo2 \
+ libpango-1.0-0 \
+ tzdata \
+ build-essential \
+ libxml2-dev \
+ libxslt1-dev \
+ && rm -rf /var/lib/apt/lists/*
+
+# Ensure Chromium is on a known path
+ENV CHROME_BIN=/usr/bin/chromium \
+ CHROMEDRIVER=/usr/bin/chromedriver
+
+WORKDIR /app
+
+# Copy deps first for better caching
+COPY requirements.txt .
+RUN pip install -r requirements.txt
+
+# Copy the app
+COPY run.py ./run.py
+COPY entrypoint.sh ./entrypoint.sh
+COPY src ./src
+COPY config ./config
+
+# Make sure data/ exists at runtime (also volume-mounted by compose)
+RUN mkdir -p /app/data && chmod +x /app/entrypoint.sh
+
+# Non-root user (optional)
+RUN useradd -ms /bin/bash appuser && chown -R appuser:appuser /app
+USER appuser
+
+ENTRYPOINT ["/app/entrypoint.sh"]
diff --git a/cronfile b/cronfile
new file mode 100644
index 0000000..ddf3293
--- /dev/null
+++ b/cronfile
@@ -0,0 +1,2 @@
+# Run epg-runner every 5 minutes
+*/5 * * * * docker compose run --rm epg-runner
diff --git a/docker-compose.yml b/docker-compose.yml
new file mode 100644
index 0000000..063d89d
--- /dev/null
+++ b/docker-compose.yml
@@ -0,0 +1,16 @@
+version: "3.8"
+services:
+ epg-runner:
+ build: .
+ image: epg-runner:latest
+ container_name: epg-runner
+ environment:
+ TZ: America/New_York
+ XML_URL: ""
+ MLB_SCHED_URL: ""
+ EPL_SCHED_URL: ""
+ UFC_SCHED_URL: ""
+ volumes:
+ - ./data:/app/data
+ # Don’t start automatically, only when scheduler calls it
+ restart: "no"
diff --git a/entrypoint.sh b/entrypoint.sh
new file mode 100644
index 0000000..dab9bce
--- /dev/null
+++ b/entrypoint.sh
@@ -0,0 +1,13 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+# Create data dir if missing (also persisted via volume)
+mkdir -p /app/data
+
+# Helpful debug: show versions
+python --version
+echo "Chromium: $(chromium --version || true)"
+echo "Chromedriver: $(chromedriver --version || true)"
+
+# Run your task
+exec python /app/run.py
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..abe9a90
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,9 @@
+beautifulsoup4>=4.12
+pandas>=2.2
+lxml>=5.2
+requests>=2.32
+cloudscraper>=1.2
+pytz>=2024.1
+Pillow>=10.4
+great-tables>=0.10.0
+selenium>=4.25
diff --git a/run.py b/run.py
new file mode 100644
index 0000000..782356b
--- /dev/null
+++ b/run.py
@@ -0,0 +1,50 @@
+# run.py
+## Download EPG ##
+import requests
+from pathlib import Path
+from datetime import datetime, timedelta
+import os
+xml_url = os.getenv("XML_URL")
+import sys
+
+DATA_DIR = Path("data")
+DATA_DIR.mkdir(parents=True, exist_ok=True)
+local_path = DATA_DIR / "epg.xml"
+
+def download_epg():
+ try:
+ resp = requests.get(xml_url, timeout=30)
+ resp.raise_for_status()
+ local_path.write_bytes(resp.content)
+ print(f"EPG saved to {local_path}")
+ except Exception as e:
+ print(f"Failed to download EPG: {e}", file=sys.stderr)
+ # If EPG is required for later steps, you may want to sys.exit(1)
+
+download_epg()
+
+def run(mod):
+ print(mod.upper())
+ rc = os.system(f"python -m src.{mod}")
+ if rc != 0:
+ print(f"ERROR in {mod} (exit {rc})")
+
+for mod in ["mlb", "epl", "ufc"]:
+ run(mod)
+
+##################
+##### SLEEPY #####
+##################
+
+print(datetime.now())
+now = datetime.now()
+manana0 = datetime.now()+timedelta(days=1)
+manana = datetime(int(manana0.year),int(manana0.month),int(manana0.day),7,30)
+print('sleeping until '+str(manana))
+print(((manana-now).days*24*60*60)+((manana-now).seconds))
+time.sleep(((manana-now).days*24*60*60)+((manana-now).seconds))
+#del game_url
+#del main_soup
+
+print(datetime.now())
+
diff --git a/src/__init__.py b/src/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/epl.py b/src/epl.py
new file mode 100644
index 0000000..ef10418
--- /dev/null
+++ b/src/epl.py
@@ -0,0 +1,191 @@
+from bs4 import BeautifulSoup
+import pandas as pd
+import re
+from datetime import date
+from lxml import etree
+import pandas as pd
+import re
+from pathlib import Path
+from datetime import datetime, date, time, timedelta, timezone
+from zoneinfo import ZoneInfo # Python 3.9+
+from great_tables import GT, md
+import os
+mlb_sched_url = os.getenv("EPL_SCHED_URL")
+import requests
+import cloudscraper
+driver = 'chrome'
+
+
+import pandas as pd
+from bs4 import BeautifulSoup
+import pytz
+
+# Load the HTML file
+headers = {
+ 'Access-Control-Allow-Origin': '*',
+ 'Access-Control-Allow-Methods': 'GET',
+ 'Access-Control-Allow-Headers': 'Content-Type',
+ 'Access-Control-Max-Age': '3600',
+ 'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0'
+ }
+
+url = f'https://fbref.com/en/comps/9/schedule/Premier-League-Scores-and-Fixtures'
+# Fetch the webpage content
+scraper = cloudscraper.create_scraper()
+
+response = scraper.get(url)
+
+# Parse the HTML content using BeautifulSoup
+soup = BeautifulSoup(response.content, 'html.parser')
+
+# Extract tables
+tables = pd.read_html(response.text)
+soccer_schedule = tables[0]
+
+# --- Clean up Date/Time ---
+# Combine Date + Time
+soccer_schedule["DateTime_UK"] = pd.to_datetime(
+ soccer_schedule["Date"].astype(str) + " " + soccer_schedule["Time"].astype(str),
+ errors="coerce"
+)
+
+# Localize to UK time
+uk = pytz.timezone("Europe/London")
+ny = pytz.timezone("America/New_York")
+soccer_schedule["DateTime_UK"] = soccer_schedule["DateTime_UK"].dt.tz_localize(uk, ambiguous="NaT", nonexistent="NaT")
+
+# Convert to New York time
+soccer_schedule["DateTime_NY"] = soccer_schedule["DateTime_UK"].dt.tz_convert(ny)
+
+# Format for display in am/pm style
+soccer_schedule["NY_Time"] = soccer_schedule["DateTime_NY"].dt.strftime("%I:%M %p")
+
+# Drop leading zeros from hour (optional)
+soccer_schedule["NY_Time"] = soccer_schedule["NY_Time"].str.lstrip("0")
+
+# Show final
+soccer_schedule = soccer_schedule.replace("Nott'ham Forest","Nottingham Forest")
+
+
+import pandas as pd
+import xml.etree.ElementTree as ET
+from datetime import date
+
+# Fix team naming
+soccer_schedule = soccer_schedule.replace("Nott'ham Forest", "Nottingham Forest")
+soccer_schedule = soccer_schedule.replace("Newcastle Utd", "Newcastle United")
+soccer_schedule = soccer_schedule.replace("Wolves", "Wolverhampton Wanderers")
+
+# Filter today's schedule & keep merge_key
+df_today = (
+ soccer_schedule[
+ pd.to_datetime(soccer_schedule.Date) == pd.to_datetime(date.today())
+ ]
+ .reset_index()
+ .rename({'index': 'merge_key'}, axis='columns')
+)
+
+df_today = df_today[['Date', 'NY_Time', 'Away', 'Home', 'Wk', 'merge_key']]
+
+if len(df_today)>0:
+ # Load XML
+ file_path = "data/epg.xml" # replace with actual path
+ tree = ET.parse(file_path)
+ root = tree.getroot()
+
+ data = []
+
+ # Iterate schedule rows
+ for _, row in df_today.iterrows():
+ home = str(row["Home"]).strip()
+ away = str(row["Away"]).strip()
+
+ for prog in root.findall("programme"):
+ title = prog.find("title").text if prog.find("title") is not None else ""
+ desc = prog.find("desc").text if prog.find("desc") is not None else ""
+
+ # Keep only if Premier League and both team names appear
+ if "Premier League" in title and home in title and away in title:
+ data.append({
+ "merge_key": row["merge_key"], # ✅ carry over merge_key
+ "Schedule_Home": home,
+ "Schedule_Away": away,
+ "Start": prog.attrib.get("start"),
+ "Stop": prog.attrib.get("stop"),
+ "Channel": prog.attrib.get("channel"),
+ "Title": title.strip() })
+
+ # Create DataFrame
+ df_matches = pd.DataFrame(data)
+
+ # Convert start/stop to datetime
+ df_matches["Start"] = pd.to_datetime(df_matches["Start"].str[:14], format="%Y%m%d%H%M%S", errors="coerce", utc=True)
+ df_matches["Stop"] = pd.to_datetime(df_matches["Stop"].str[:14], format="%Y%m%d%H%M%S", errors="coerce", utc=True)
+
+ df = pd.merge(
+ df_today,
+ df_matches[['merge_key','Schedule_Home','Schedule_Away','Channel']].drop_duplicates(),
+ how='left',
+ on='merge_key'
+ )
+
+
+ ####
+
+ for game in df['merge_key'].unique():
+ temp = df[df['merge_key']==game].drop_duplicates().reset_index(drop=True)
+ temp = temp.rename({'NY_Time':'Time'},axis='columns')
+ temp['Wk'] = temp.Wk.astype('int')
+
+ temp2 = temp[['Channel']].drop_duplicates().reset_index(drop=True)
+
+ # Split once on "." into two columns
+ split_cols = temp2['Channel'].str.split('.', n=1, expand=True)
+ temp2['Channel'] = split_cols[0] # part before "."
+ temp2['Country'] = split_cols[1].str.upper().fillna("") # part after ".", or "" if missing
+
+ # Reorder
+ temp2 = temp2[['Country', 'Channel']]
+
+ # Regex pattern: 'FOX' + 4 uppercase letters (total length 7, all caps)
+ temp2['Country'] = temp2['Country'].replace('US2','US')
+ pattern = re.compile(r"^NBC[A-Z]{4}$")
+ # Replace matching channels with 'FOX'
+ temp2["Channel"] = temp2["Channel"].apply(lambda x: "NBC" if pattern.match(x) else x)
+ pattern = re.compile(r"^NBC[A-Z]{5}$")
+ # Replace matching channels with 'FOX'
+ temp2["Channel"] = temp2["Channel"].apply(lambda x: "NBC" if pattern.match(x) else x)
+ pattern = re.compile(r"^NBC[A-Z]{6}$")
+ # Replace matching channels with 'FOX'
+ temp2["Channel"] = temp2["Channel"].apply(lambda x: "NBC" if pattern.match(x) else x)
+ pattern = re.compile(r"^NBC[A-Z]{3}$")
+ # Replace matching channels with 'FOX'
+ temp2["Channel"] = temp2["Channel"].apply(lambda x: "NBC" if pattern.match(x) else x)
+
+
+ temp2 = temp2.drop_duplicates()
+ ###
+
+ game_table = (
+ GT(temp2.drop_duplicates())
+ .tab_header(
+ title=f"{temp['Away'][0]} @ {temp['Home'][0]}",
+ subtitle=f"Matchweek {temp.Wk[0]} {temp['Time'][0]} {str(temp['Date'][0]).split(' ')[0]}",
+ ).tab_source_note(md(' '))
+ )
+ game_table_image_path = 'data/epl.png'
+ game_table.save(game_table_image_path,window_size=(1000, 1000),web_driver=driver)
+
+ with open(game_table_image_path, 'rb') as image_file:
+ # Prepare the payload with the file
+ payload = {
+ 'file': image_file
+ }
+
+ # Send the POST request to the webhook
+ response = requests.post(epl_sched_url, files=payload)
+
+
+ os.remove(game_table_image_path)
+
+
diff --git a/src/mlb.py b/src/mlb.py
new file mode 100644
index 0000000..4b4b2b7
--- /dev/null
+++ b/src/mlb.py
@@ -0,0 +1,333 @@
+from bs4 import BeautifulSoup
+import pandas as pd
+import re
+from datetime import date
+from lxml import etree
+import pandas as pd
+import re
+from pathlib import Path
+from datetime import datetime, date, time, timedelta, timezone
+from zoneinfo import ZoneInfo # Python 3.9+
+from great_tables import GT, md
+import os
+mlb_sched_url = os.getenv("MLB_SCHED_URL")
+import requests
+import cloudscraper
+driver = 'chrome'
+
+year = date.today().year
+# Load the HTML file
+headers = {
+ 'Access-Control-Allow-Origin': '*',
+ 'Access-Control-Allow-Methods': 'GET',
+ 'Access-Control-Allow-Headers': 'Content-Type',
+ 'Access-Control-Max-Age': '3600',
+ 'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0'
+ }
+
+url = f'https://www.baseball-reference.com/leagues/majors/{year}-schedule.shtml'
+# Fetch the webpage content
+scraper = cloudscraper.create_scraper()
+
+response = scraper.get(url)
+
+# Parse the HTML content using BeautifulSoup
+soup = BeautifulSoup(response.content, 'html.parser')
+
+
+today_section = soup.find("span", id="today")
+
+data = []
+if today_section:
+ for game in today_section.find_all_next("p", class_="game"):
+ gm_time = game.find("strong").get_text(strip=True) if game.find("strong") else None
+ teams = game.find_all("a")
+ if len(teams) >= 2:
+ away_team = teams[0].get_text(strip=True)
+ home_team = teams[1].get_text(strip=True)
+ else:
+ away_team = home_team = None
+
+ preview_tag = game.find("em")
+ preview_link = preview_tag.a["href"] if preview_tag and preview_tag.a else None
+
+ # Extract date from preview link if present
+ game_date = None
+ if preview_link:
+ game_date = pd.to_datetime(preview_link.split('/')[3][3:11])
+
+ data.append({
+ "Time": gm_time,
+ "Away Team": away_team,
+ "Home Team": home_team,
+ "Preview Link": preview_link,
+ "Game Date": game_date
+ })
+
+sched = pd.DataFrame(data)
+sched = sched[sched['Game Date']==str(date.today())]
+
+if len(sched)>0:
+
+ sched['Away Team'] = sched['Away Team'].replace("Arizona D'Backs","Arizona Diamondbacks")
+ sched['Home Team'] = sched['Home Team'].replace("Arizona D'Backs","Arizona Diamondbacks")
+
+
+ ##############
+
+ teams = list(pd.concat([sched['Away Team'], sched['Home Team'] ]))
+
+ ##############
+
+
+
+
+ # --- point this to your file ---
+ xml_path = Path("data/epg.xml")
+
+ # Parse with lxml recovery; wrap to guarantee a single root
+ parser = etree.XMLParser(recover=True, encoding="utf-8")
+ tree = etree.fromstring(b"
within this event's own section only.""" + tv_text = None + for n in section_nodes: + if isinstance(n, Tag) and n.name == "p" and "m-mmaf-pte-event-list__tv-info" in (n.get("class") or []): + tv_text = n.get_text(" ", strip=True).lower() + break + if not tv_text: + return None, None + + main_m = re.search(r"main\s*card[^;]*?\bat\s*" + _time_pat, tv_text) + prelim_m = re.search(r"prelims?[^;]*?\bat\s*" + _time_pat, tv_text) + main_time = normalize_time(main_m.group(1)) if main_m else None + prelim_time = normalize_time(prelim_m.group(1)) if prelim_m else None + return main_time, prelim_time + +# The schedule list wrapper(s) +wrappers = soup.find_all("div", class_="m-mmaf-pte-event-list") +for wrapper in wrappers: + # iterate each event header inside the wrapper + for h2 in wrapper.find_all("h2"): + event_name = h2.get_text(strip=True) + + # the date for this event is the next sibling