Files
prawicowy-dashboard/server.py
T
2026-04-29 19:04:25 +02:00

281 lines
10 KiB
Python

#!/usr/bin/env python3
"""
Agregator serwer — serwuje pliki statyczne + proxy RSS (bez zewnętrznych zależności).
"""
import os, json, time, threading, urllib.request, ssl, gzip, io
import xml.etree.ElementTree as ET
from http.server import HTTPServer, SimpleHTTPRequestHandler
from urllib.parse import urlparse, parse_qs
ROOT = os.path.dirname(os.path.abspath(__file__))
CACHE = {} # url -> (timestamp, data)
CACHE_TTL = 600 # 10 minut
FETCH_TIMEOUT = 12
CTX = ssl.create_default_context()
FEEDS = {
# Prawicowe
"republika": [
"https://dorzeczy.pl/feed", # backup bo republika blokuje
],
"wpolsce": [
"https://niezalezna.pl/feed",
],
"trwam": [
"https://www.radiomaryja.pl/feed/",
],
# Opozycja
"opozycja_tvp": [
"https://tvp.info/feed",
"https://www.tvp.info/rss",
],
"opozycja_tvn": [
"https://tvn24.pl/wiadomosci-z-kraju,3.xml",
],
"opozycja_wyborcza": [
"https://rss.wyborcza.pl/wyborcza.rss",
],
}
HEADERS = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0 Safari/537.36",
"Accept": "application/rss+xml, application/xml, text/xml, */*",
"Accept-Encoding": "gzip, deflate",
"Accept-Language": "pl-PL,pl;q=0.9,en;q=0.8",
}
NS = {
'dc': 'http://purl.org/dc/elements/1.1/',
'content': 'http://purl.org/rss/1.0/modules/content/',
'media': 'http://search.yahoo.com/mrss/',
'atom': 'http://www.w3.org/2005/Atom',
}
def fetch_url(url):
"""Fetch URL with cache — uses curl subprocess (more reliable TLS)."""
now = time.time()
if url in CACHE and now - CACHE[url][0] < CACHE_TTL:
return CACHE[url][1]
import subprocess
result = subprocess.run(
['curl', '-sL', '--max-time', str(FETCH_TIMEOUT),
'-A', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
'-H', 'Accept: application/rss+xml,application/xml,text/xml,*/*',
'-H', 'Accept-Language: pl-PL,pl;q=0.9',
'--compressed',
url],
capture_output=True, timeout=FETCH_TIMEOUT + 3
)
if result.returncode != 0:
raise RuntimeError(f"curl error {result.returncode}: {result.stderr.decode()[:100]}")
data = result.stdout
if not data:
raise RuntimeError("empty response")
CACHE[url] = (now, data)
return data
def parse_rss(xml_bytes):
"""Parse RSS/Atom XML into list of items."""
try:
root = ET.fromstring(xml_bytes)
except ET.ParseError:
# Try stripping BOM/prolog
text = xml_bytes.decode('utf-8', errors='replace').lstrip('\ufeff')
root = ET.fromstring(text)
items = []
tag = root.tag.lower()
if 'rss' in tag or root.find('channel') is not None:
# RSS 2.0
for item in root.iter('item'):
title = _t(item, 'title')
link = _t(item, 'link') or _t(item, '{http://www.w3.org/2005/Atom}link')
desc = _t(item, 'description') or _t(item, '{http://purl.org/rss/1.0/modules/content/}encoded')
date = _t(item, 'pubDate') or _t(item, '{http://purl.org/dc/elements/1.1/}date')
thumb = ''
# media:thumbnail or enclosure
mt = item.find('{http://search.yahoo.com/mrss/}thumbnail')
if mt is not None:
thumb = mt.get('url', '')
enc = item.find('enclosure')
if not thumb and enc is not None and 'image' in (enc.get('type') or ''):
thumb = enc.get('url', '')
if not thumb:
thumb = extract_img(desc)
items.append({
'title': strip_tags(title),
'link': link or '',
'summary': strip_tags(desc)[:300],
'date': date or '',
'thumb': thumb or '',
})
elif 'feed' in tag:
# Atom
for entry in root.iter('{http://www.w3.org/2005/Atom}entry'):
title = _t(entry, '{http://www.w3.org/2005/Atom}title')
link_el = entry.find('{http://www.w3.org/2005/Atom}link[@rel="alternate"]')
if link_el is None:
link_el = entry.find('{http://www.w3.org/2005/Atom}link')
link = link_el.get('href', '') if link_el is not None else ''
desc = _t(entry, '{http://www.w3.org/2005/Atom}summary') or \
_t(entry, '{http://www.w3.org/2005/Atom}content')
date = _t(entry, '{http://www.w3.org/2005/Atom}updated') or \
_t(entry, '{http://www.w3.org/2005/Atom}published')
items.append({
'title': strip_tags(title),
'link': link,
'summary': strip_tags(desc)[:300],
'date': date or '',
'thumb': extract_img(desc),
})
return items[:30]
def _t(el, tag):
found = el.find(tag)
return (found.text or '').strip() if found is not None and found.text else ''
def strip_tags(html):
if not html: return ''
import re
return re.sub(r'<[^>]+>', '', html).replace('&amp;','&').replace('&lt;','<').replace('&gt;','>').replace('&nbsp;',' ').replace('&#39;',"'").strip()
def extract_img(html):
if not html: return ''
import re
m = re.search(r'<img[^>]+src=["\']([^"\']+)["\']', html, re.I)
return m.group(1) if m else ''
def fetch_group(group_id):
"""Fetch all feeds for a group, return first that works."""
urls = FEEDS.get(group_id, [])
for url in urls:
try:
data = fetch_url(url)
items = parse_rss(data)
if items:
return {'status': 'ok', 'source': group_id, 'items': items}
except Exception as e:
print(f"[{group_id}] {url}: {e}")
return {'status': 'error', 'source': group_id, 'items': []}
class Handler(SimpleHTTPRequestHandler):
def __init__(self, *args, **kwargs):
super().__init__(*args, directory=ROOT, **kwargs)
def log_message(self, fmt, *args):
pass # Suppress access log spam
def do_GET(self):
parsed = urlparse(self.path)
# ── /api/feeds?groups=republika,wpolsce,... ──
if parsed.path == '/api/feeds':
qs = parse_qs(parsed.query)
groups = qs.get('groups', [','.join(FEEDS.keys())])[0].split(',')
groups = [g.strip() for g in groups if g.strip() in FEEDS]
results = {}
threads = []
lock = threading.Lock()
def worker(gid):
r = fetch_group(gid)
with lock:
results[gid] = r
for gid in groups:
t = threading.Thread(target=worker, args=(gid,))
t.start()
threads.append(t)
for t in threads:
t.join(timeout=15)
body = json.dumps(results, ensure_ascii=False).encode('utf-8')
self.send_response(200)
self.send_header('Content-Type', 'application/json; charset=utf-8')
self.send_header('Access-Control-Allow-Origin', '*')
self.send_header('Content-Length', len(body))
self.end_headers()
self.wfile.write(body)
return
# ── /api/feed?url=... (single feed proxy) ──
if parsed.path == '/api/feed':
qs = parse_qs(parsed.query)
url = qs.get('url', [''])[0]
if not url:
self.send_error(400, 'Missing url')
return
try:
data = fetch_url(url)
items = parse_rss(data)
body = json.dumps({'status':'ok','items':items}, ensure_ascii=False).encode('utf-8')
except Exception as e:
body = json.dumps({'status':'error','message':str(e)}, ensure_ascii=False).encode('utf-8')
self.send_response(200)
self.send_header('Content-Type', 'application/json; charset=utf-8')
self.send_header('Access-Control-Allow-Origin', '*')
self.send_header('Content-Length', len(body))
self.end_headers()
self.wfile.write(body)
return
# ── /api/radio/maryja — stream proxy ──
if parsed.path == '/api/radio/maryja':
RADIO_URL = 'http://51.68.135.155:80/stream'
import socket
try:
# Open raw TCP to Shoutcast server
host, port = '51.68.135.155', 80
sock = socket.create_connection((host, port), timeout=10)
req = (
'GET /stream HTTP/1.0\r\n'
'Host: 51.68.135.155\r\n'
'User-Agent: Mozilla/5.0\r\n'
'Icy-MetaData: 0\r\n'
'\r\n'
)
sock.sendall(req.encode())
# Read until end of HTTP headers
buf = b''
while b'\r\n\r\n' not in buf:
chunk = sock.recv(1024)
if not chunk:
break
buf += chunk
# Send our own clean headers to browser
self.send_response(200)
self.send_header('Content-Type', 'audio/aac')
self.send_header('Cache-Control', 'no-cache')
self.send_header('Access-Control-Allow-Origin', '*')
self.send_header('Transfer-Encoding', 'chunked')
self.end_headers()
# Stream body
leftover = buf.split(b'\r\n\r\n', 1)
if len(leftover) > 1 and leftover[1]:
self.wfile.write(leftover[1])
while True:
chunk = sock.recv(4096)
if not chunk:
break
self.wfile.write(chunk)
sock.close()
except Exception as e:
print(f'[radio proxy] {e}')
return
# ── static files ──
super().do_GET()
if __name__ == '__main__':
os.chdir(ROOT)
server = HTTPServer(('0.0.0.0', 1234), Handler)
print(f"Serwer uruchomiony na http://0.0.0.0:1234 (katalog: {ROOT})")
server.serve_forever()