#!/usr/bin/env python3 """ Agregator serwer — serwuje pliki statyczne + proxy RSS (bez zewnętrznych zależności). """ import os, json, time, threading, urllib.request, ssl, gzip, io import xml.etree.ElementTree as ET from http.server import HTTPServer, SimpleHTTPRequestHandler from urllib.parse import urlparse, parse_qs ROOT = os.path.dirname(os.path.abspath(__file__)) CACHE = {} # url -> (timestamp, data) CACHE_TTL = 600 # 10 minut FETCH_TIMEOUT = 12 CTX = ssl.create_default_context() FEEDS = { # Prawicowe "republika": [ "https://dorzeczy.pl/feed", # backup bo republika blokuje ], "wpolsce": [ "https://niezalezna.pl/feed", ], "trwam": [ "https://www.radiomaryja.pl/feed/", ], # Opozycja "opozycja_tvp": [ "https://tvp.info/feed", "https://www.tvp.info/rss", ], "opozycja_tvn": [ "https://tvn24.pl/wiadomosci-z-kraju,3.xml", ], "opozycja_wyborcza": [ "https://rss.wyborcza.pl/wyborcza.rss", ], } HEADERS = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0 Safari/537.36", "Accept": "application/rss+xml, application/xml, text/xml, */*", "Accept-Encoding": "gzip, deflate", "Accept-Language": "pl-PL,pl;q=0.9,en;q=0.8", } NS = { 'dc': 'http://purl.org/dc/elements/1.1/', 'content': 'http://purl.org/rss/1.0/modules/content/', 'media': 'http://search.yahoo.com/mrss/', 'atom': 'http://www.w3.org/2005/Atom', } def fetch_url(url): """Fetch URL with cache — uses curl subprocess (more reliable TLS).""" now = time.time() if url in CACHE and now - CACHE[url][0] < CACHE_TTL: return CACHE[url][1] import subprocess result = subprocess.run( ['curl', '-sL', '--max-time', str(FETCH_TIMEOUT), '-A', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36', '-H', 'Accept: application/rss+xml,application/xml,text/xml,*/*', '-H', 'Accept-Language: pl-PL,pl;q=0.9', '--compressed', url], capture_output=True, timeout=FETCH_TIMEOUT + 3 ) if result.returncode != 0: raise RuntimeError(f"curl error {result.returncode}: {result.stderr.decode()[:100]}") data = result.stdout if not data: raise RuntimeError("empty response") CACHE[url] = (now, data) return data def parse_rss(xml_bytes): """Parse RSS/Atom XML into list of items.""" try: root = ET.fromstring(xml_bytes) except ET.ParseError: # Try stripping BOM/prolog text = xml_bytes.decode('utf-8', errors='replace').lstrip('\ufeff') root = ET.fromstring(text) items = [] tag = root.tag.lower() if 'rss' in tag or root.find('channel') is not None: # RSS 2.0 for item in root.iter('item'): title = _t(item, 'title') link = _t(item, 'link') or _t(item, '{http://www.w3.org/2005/Atom}link') desc = _t(item, 'description') or _t(item, '{http://purl.org/rss/1.0/modules/content/}encoded') date = _t(item, 'pubDate') or _t(item, '{http://purl.org/dc/elements/1.1/}date') thumb = '' # media:thumbnail or enclosure mt = item.find('{http://search.yahoo.com/mrss/}thumbnail') if mt is not None: thumb = mt.get('url', '') enc = item.find('enclosure') if not thumb and enc is not None and 'image' in (enc.get('type') or ''): thumb = enc.get('url', '') if not thumb: thumb = extract_img(desc) items.append({ 'title': strip_tags(title), 'link': link or '', 'summary': strip_tags(desc)[:300], 'date': date or '', 'thumb': thumb or '', }) elif 'feed' in tag: # Atom for entry in root.iter('{http://www.w3.org/2005/Atom}entry'): title = _t(entry, '{http://www.w3.org/2005/Atom}title') link_el = entry.find('{http://www.w3.org/2005/Atom}link[@rel="alternate"]') if link_el is None: link_el = entry.find('{http://www.w3.org/2005/Atom}link') link = link_el.get('href', '') if link_el is not None else '' desc = _t(entry, '{http://www.w3.org/2005/Atom}summary') or \ _t(entry, '{http://www.w3.org/2005/Atom}content') date = _t(entry, '{http://www.w3.org/2005/Atom}updated') or \ _t(entry, '{http://www.w3.org/2005/Atom}published') items.append({ 'title': strip_tags(title), 'link': link, 'summary': strip_tags(desc)[:300], 'date': date or '', 'thumb': extract_img(desc), }) return items[:30] def _t(el, tag): found = el.find(tag) return (found.text or '').strip() if found is not None and found.text else '' def strip_tags(html): if not html: return '' import re return re.sub(r'<[^>]+>', '', html).replace('&','&').replace('<','<').replace('>','>').replace(' ',' ').replace(''',"'").strip() def extract_img(html): if not html: return '' import re m = re.search(r']+src=["\']([^"\']+)["\']', html, re.I) return m.group(1) if m else '' def fetch_group(group_id): """Fetch all feeds for a group, return first that works.""" urls = FEEDS.get(group_id, []) for url in urls: try: data = fetch_url(url) items = parse_rss(data) if items: return {'status': 'ok', 'source': group_id, 'items': items} except Exception as e: print(f"[{group_id}] {url}: {e}") return {'status': 'error', 'source': group_id, 'items': []} class Handler(SimpleHTTPRequestHandler): def __init__(self, *args, **kwargs): super().__init__(*args, directory=ROOT, **kwargs) def log_message(self, fmt, *args): pass # Suppress access log spam def do_GET(self): parsed = urlparse(self.path) # ── /api/feeds?groups=republika,wpolsce,... ── if parsed.path == '/api/feeds': qs = parse_qs(parsed.query) groups = qs.get('groups', [','.join(FEEDS.keys())])[0].split(',') groups = [g.strip() for g in groups if g.strip() in FEEDS] results = {} threads = [] lock = threading.Lock() def worker(gid): r = fetch_group(gid) with lock: results[gid] = r for gid in groups: t = threading.Thread(target=worker, args=(gid,)) t.start() threads.append(t) for t in threads: t.join(timeout=15) body = json.dumps(results, ensure_ascii=False).encode('utf-8') self.send_response(200) self.send_header('Content-Type', 'application/json; charset=utf-8') self.send_header('Access-Control-Allow-Origin', '*') self.send_header('Content-Length', len(body)) self.end_headers() self.wfile.write(body) return # ── /api/feed?url=... (single feed proxy) ── if parsed.path == '/api/feed': qs = parse_qs(parsed.query) url = qs.get('url', [''])[0] if not url: self.send_error(400, 'Missing url') return try: data = fetch_url(url) items = parse_rss(data) body = json.dumps({'status':'ok','items':items}, ensure_ascii=False).encode('utf-8') except Exception as e: body = json.dumps({'status':'error','message':str(e)}, ensure_ascii=False).encode('utf-8') self.send_response(200) self.send_header('Content-Type', 'application/json; charset=utf-8') self.send_header('Access-Control-Allow-Origin', '*') self.send_header('Content-Length', len(body)) self.end_headers() self.wfile.write(body) return # ── /api/radio/maryja — stream proxy ── if parsed.path == '/api/radio/maryja': RADIO_URL = 'http://51.68.135.155:80/stream' import socket try: # Open raw TCP to Shoutcast server host, port = '51.68.135.155', 80 sock = socket.create_connection((host, port), timeout=10) req = ( 'GET /stream HTTP/1.0\r\n' 'Host: 51.68.135.155\r\n' 'User-Agent: Mozilla/5.0\r\n' 'Icy-MetaData: 0\r\n' '\r\n' ) sock.sendall(req.encode()) # Read until end of HTTP headers buf = b'' while b'\r\n\r\n' not in buf: chunk = sock.recv(1024) if not chunk: break buf += chunk # Send our own clean headers to browser self.send_response(200) self.send_header('Content-Type', 'audio/aac') self.send_header('Cache-Control', 'no-cache') self.send_header('Access-Control-Allow-Origin', '*') self.send_header('Transfer-Encoding', 'chunked') self.end_headers() # Stream body leftover = buf.split(b'\r\n\r\n', 1) if len(leftover) > 1 and leftover[1]: self.wfile.write(leftover[1]) while True: chunk = sock.recv(4096) if not chunk: break self.wfile.write(chunk) sock.close() except Exception as e: print(f'[radio proxy] {e}') return # ── static files ── super().do_GET() if __name__ == '__main__': os.chdir(ROOT) server = HTTPServer(('0.0.0.0', 1234), Handler) print(f"Serwer uruchomiony na http://0.0.0.0:1234 (katalog: {ROOT})") server.serve_forever()