prawicowy-dashboard/server.py

#!/usr/bin/env python3
"""
Agregator serwer — serwuje pliki statyczne + proxy RSS (bez zewnętrznych zależności).
"""
import os, json, time, threading, urllib.request, ssl, gzip, io
import xml.etree.ElementTree as ET
from http.server import HTTPServer, SimpleHTTPRequestHandler
from urllib.parse import urlparse, parse_qs

ROOT = os.path.dirname(os.path.abspath(__file__))
CACHE = {}          # url -> (timestamp, data)
CACHE_TTL = 600     # 10 minut
FETCH_TIMEOUT = 12

CTX = ssl.create_default_context()

FEEDS = {
    # Prawicowe
    "republika": [
        "https://dorzeczy.pl/feed",          # backup bo republika blokuje
    ],
    "wpolsce": [
        "https://niezalezna.pl/feed",
    ],
    "trwam": [
        "https://www.radiomaryja.pl/feed/",
    ],
    # Opozycja
    "opozycja_tvp": [
        "https://tvp.info/feed",
        "https://www.tvp.info/rss",
    ],
    "opozycja_tvn": [
        "https://tvn24.pl/wiadomosci-z-kraju,3.xml",
    ],
    "opozycja_wyborcza": [
        "https://rss.wyborcza.pl/wyborcza.rss",
    ],
}

HEADERS = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0 Safari/537.36",
    "Accept": "application/rss+xml, application/xml, text/xml, */*",
    "Accept-Encoding": "gzip, deflate",
    "Accept-Language": "pl-PL,pl;q=0.9,en;q=0.8",
}

NS = {
    'dc': 'http://purl.org/dc/elements/1.1/',
    'content': 'http://purl.org/rss/1.0/modules/content/',
    'media': 'http://search.yahoo.com/mrss/',
    'atom': 'http://www.w3.org/2005/Atom',
}

def fetch_url(url):
    """Fetch URL with cache — uses curl subprocess (more reliable TLS)."""
    now = time.time()
    if url in CACHE and now - CACHE[url][0] < CACHE_TTL:
        return CACHE[url][1]

    import subprocess
    result = subprocess.run(
        ['curl', '-sL', '--max-time', str(FETCH_TIMEOUT),
         '-A', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
         '-H', 'Accept: application/rss+xml,application/xml,text/xml,*/*',
         '-H', 'Accept-Language: pl-PL,pl;q=0.9',
         '--compressed',
         url],
        capture_output=True, timeout=FETCH_TIMEOUT + 3
    )
    if result.returncode != 0:
        raise RuntimeError(f"curl error {result.returncode}: {result.stderr.decode()[:100]}")
    data = result.stdout
    if not data:
        raise RuntimeError("empty response")
    CACHE[url] = (now, data)
    return data

def parse_rss(xml_bytes):
    """Parse RSS/Atom XML into list of items."""
    try:
        root = ET.fromstring(xml_bytes)
    except ET.ParseError:
        # Try stripping BOM/prolog
        text = xml_bytes.decode('utf-8', errors='replace').lstrip('\ufeff')
        root = ET.fromstring(text)

    items = []
    tag = root.tag.lower()

    if 'rss' in tag or root.find('channel') is not None:
        # RSS 2.0
        for item in root.iter('item'):
            title   = _t(item, 'title')
            link    = _t(item, 'link') or _t(item, '{http://www.w3.org/2005/Atom}link')
            desc    = _t(item, 'description') or _t(item, '{http://purl.org/rss/1.0/modules/content/}encoded')
            date    = _t(item, 'pubDate') or _t(item, '{http://purl.org/dc/elements/1.1/}date')
            thumb   = ''
            # media:thumbnail or enclosure
            mt = item.find('{http://search.yahoo.com/mrss/}thumbnail')
            if mt is not None:
                thumb = mt.get('url', '')
            enc = item.find('enclosure')
            if not thumb and enc is not None and 'image' in (enc.get('type') or ''):
                thumb = enc.get('url', '')
            if not thumb:
                thumb = extract_img(desc)
            items.append({
                'title':   strip_tags(title),
                'link':    link or '',
                'summary': strip_tags(desc)[:300],
                'date':    date or '',
                'thumb':   thumb or '',
            })
    elif 'feed' in tag:
        # Atom
        for entry in root.iter('{http://www.w3.org/2005/Atom}entry'):
            title  = _t(entry, '{http://www.w3.org/2005/Atom}title')
            link_el = entry.find('{http://www.w3.org/2005/Atom}link[@rel="alternate"]')
            if link_el is None:
                link_el = entry.find('{http://www.w3.org/2005/Atom}link')
            link   = link_el.get('href', '') if link_el is not None else ''
            desc   = _t(entry, '{http://www.w3.org/2005/Atom}summary') or \
                     _t(entry, '{http://www.w3.org/2005/Atom}content')
            date   = _t(entry, '{http://www.w3.org/2005/Atom}updated') or \
                     _t(entry, '{http://www.w3.org/2005/Atom}published')
            items.append({
                'title':   strip_tags(title),
                'link':    link,
                'summary': strip_tags(desc)[:300],
                'date':    date or '',
                'thumb':   extract_img(desc),
            })

    return items[:30]

def _t(el, tag):
    found = el.find(tag)
    return (found.text or '').strip() if found is not None and found.text else ''

def strip_tags(html):
    if not html: return ''
    import re
    return re.sub(r'<[^>]+>', '', html).replace('&amp;','&').replace('&lt;','<').replace('&gt;','>').replace('&nbsp;',' ').replace('&#39;',"'").strip()

def extract_img(html):
    if not html: return ''
    import re
    m = re.search(r'<img[^>]+src=["\']([^"\']+)["\']', html, re.I)
    return m.group(1) if m else ''

def fetch_group(group_id):
    """Fetch all feeds for a group, return first that works."""
    urls = FEEDS.get(group_id, [])
    for url in urls:
        try:
            data = fetch_url(url)
            items = parse_rss(data)
            if items:
                return {'status': 'ok', 'source': group_id, 'items': items}
        except Exception as e:
            print(f"[{group_id}] {url}: {e}")
    return {'status': 'error', 'source': group_id, 'items': []}


class Handler(SimpleHTTPRequestHandler):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, directory=ROOT, **kwargs)

    def log_message(self, fmt, *args):
        pass  # Suppress access log spam

    def do_GET(self):
        parsed = urlparse(self.path)

        # ── /api/feeds?groups=republika,wpolsce,... ──
        if parsed.path == '/api/feeds':
            qs = parse_qs(parsed.query)
            groups = qs.get('groups', [','.join(FEEDS.keys())])[0].split(',')
            groups = [g.strip() for g in groups if g.strip() in FEEDS]

            results = {}
            threads = []
            lock = threading.Lock()

            def worker(gid):
                r = fetch_group(gid)
                with lock:
                    results[gid] = r

            for gid in groups:
                t = threading.Thread(target=worker, args=(gid,))
                t.start()
                threads.append(t)
            for t in threads:
                t.join(timeout=15)

            body = json.dumps(results, ensure_ascii=False).encode('utf-8')
            self.send_response(200)
            self.send_header('Content-Type', 'application/json; charset=utf-8')
            self.send_header('Access-Control-Allow-Origin', '*')
            self.send_header('Content-Length', len(body))
            self.end_headers()
            self.wfile.write(body)
            return

        # ── /api/feed?url=... (single feed proxy) ──
        if parsed.path == '/api/feed':
            qs = parse_qs(parsed.query)
            url = qs.get('url', [''])[0]
            if not url:
                self.send_error(400, 'Missing url')
                return
            try:
                data = fetch_url(url)
                items = parse_rss(data)
                body = json.dumps({'status':'ok','items':items}, ensure_ascii=False).encode('utf-8')
            except Exception as e:
                body = json.dumps({'status':'error','message':str(e)}, ensure_ascii=False).encode('utf-8')
            self.send_response(200)
            self.send_header('Content-Type', 'application/json; charset=utf-8')
            self.send_header('Access-Control-Allow-Origin', '*')
            self.send_header('Content-Length', len(body))
            self.end_headers()
            self.wfile.write(body)
            return

        # ── /api/radio/maryja — stream proxy ──
        if parsed.path == '/api/radio/maryja':
            RADIO_URL = 'http://51.68.135.155:80/stream'
            import socket
            try:
                # Open raw TCP to Shoutcast server
                host, port = '51.68.135.155', 80
                sock = socket.create_connection((host, port), timeout=10)
                req = (
                    'GET /stream HTTP/1.0\r\n'
                    'Host: 51.68.135.155\r\n'
                    'User-Agent: Mozilla/5.0\r\n'
                    'Icy-MetaData: 0\r\n'
                    '\r\n'
                )
                sock.sendall(req.encode())
                # Read until end of HTTP headers
                buf = b''
                while b'\r\n\r\n' not in buf:
                    chunk = sock.recv(1024)
                    if not chunk:
                        break
                    buf += chunk
                # Send our own clean headers to browser
                self.send_response(200)
                self.send_header('Content-Type', 'audio/aac')
                self.send_header('Cache-Control', 'no-cache')
                self.send_header('Access-Control-Allow-Origin', '*')
                self.send_header('Transfer-Encoding', 'chunked')
                self.end_headers()
                # Stream body
                leftover = buf.split(b'\r\n\r\n', 1)
                if len(leftover) > 1 and leftover[1]:
                    self.wfile.write(leftover[1])
                while True:
                    chunk = sock.recv(4096)
                    if not chunk:
                        break
                    self.wfile.write(chunk)
                sock.close()
            except Exception as e:
                print(f'[radio proxy] {e}')
            return

        # ── static files ──
        super().do_GET()


if __name__ == '__main__':
    os.chdir(ROOT)
    server = HTTPServer(('0.0.0.0', 1234), Handler)
    print(f"Serwer uruchomiony na http://0.0.0.0:1234  (katalog: {ROOT})")
    server.serve_forever()