config.py 4.79 KiB
import os
RSS_ENV = os.environ.get("RSS_ENV", "dev")
RSS_POD_URL = os.environ.get("RSS_POD_URL", "https://dev.backend.memri.io")
RSS_OWNER_KEY = os.environ.get("RSS_OWNER_KEY", None)
RSS_DATABASE_KEY = os.environ.get("RSS_DATABASE_KEY", None)
RSS_SETUP_ON_START = os.environ.get("RSS_SETUP_ON_START", None) is not None
RSS_MAX_ENTRIES_ON_START = int(os.environ.get("RSS_MAX_ENTRIES_ON_START", 100))
RSS_DEFAULT_DELTA_DAYS = int(os.environ.get("RSS_DEFAULT_DELTA_DAYS", 1))
MIN_INPUT_LENGTH = 200
SUMMARY_MIN_LENGTH = 50
SUMMARY_MAX_LENGTH = 120
POSTLIGHT_API_URL = os.environ.get(
    "POSTLIGHT_API_URL", "https://postlight-parser-api-7n2iai4dea-nw.a.run.app"
SUMMARY_SOURCE = "memri"
SEMANTIC_SEARCH_URL = os.environ.get(
    "SEMANTIC_SEARCH_URL", "https://semantic-search.dev.backend.memri.io"
SUMMARIZATION_URL = os.environ.get(
    "SUMMARIZATION_URL", "https://summarization.dev.backend.memri.io/"
MEMRI_BOT_URL = os.environ.get("MEMRI_BOT_URL", "https://chatbot.dev.backend.memri.io")
DEFAULT_FEEDS = [
    ("https://www.theguardian.com/international/rss", "The Guardian"),
    ("https://tinybuddha.com/emailrss", "Tiny Buddha"),  # 10 entries
    ("https://www.livescience.com/feeds/all", "Live Science"),
    ("https://www.wellandgood.com/feed/", "Well+Good"),
    ("https://www.espn.com/espn/rss/news", "ESPN"),  # Top headlines
    ("https://www.mindful.org/feed/", "Mindful.org"),
    ("https://www.space.com/feeds/all", "Space"),
    ("https://www.insider.co.uk/?service=rss", "Insider.co.uk"),
    ("https://www.calmsage.com/feed/", "Calm Sage"),  # 10 entries
    ("http://rss.cnn.com/rss/edition.rss", "CNN"),  # Top stories
        "http://feeds.bbci.co.uk/news/rss.xml",
        "BBC News",
        "https://rss.nytimes.com/services/xml/rss/nyt/HomePage.xml",
        "NY Times",
        "https://www.reutersagency.com/feed/?taxonomy=best-topics&post_type=best",
        "Reuters",
    ("https://decider.com/feed/", "Decider"),  # 10 entries
    ("https://rss.punchng.com/v1/category/latest_news", "PUNCH"),  # 30 entries
    ("https://www.dezeen.com/feed/", "Dezeen"),  # 50 entries
    ("https://www.avclub.com/rss", "The AV Club"),
    ("https://kotaku.com/rss", "Kotaku"),
    ("https://gizmodo.com/rss", "Gizmodo"),
    ("https://feeds.feedburner.com/yankodesign", "Yanko Design"),
    ("https://feeds.feedburner.com/ign/all", "IGN"),
    ("https://feeds.macrumors.com/MacRumors-All", "MacRumors"),
    ("https://phys.org/rss-feed/", "Phys.org"),
    ("https://bgr.com/news/feed/", "BGR"),
        "https://search.cnbc.com/rs/search/combinedcms/view.xml?partnerId=wrss01&id=100003114",
        "CNBCE",
    ),  # Top news
    # ("https://www.buzzfeed.com/index.xml", "BuzzFeed"),  # Contains :/// notation
    ("https://techcrunch.com/feed/", "TechCrunch"),
    ("https://thenextweb.com/feed", "The Next Web"),
    ("https://venturebeat.com/feed/", "VentureBeat"),
    ("https://www.wired.com/feed/rss", "Wired"),
    ("https://www.theverge.com/rss/index.xml", "The Verge"),
    # ("https://www.reddit.com/top.rss?t=day", "Reddit"),
71727374757677787980818283848586878889909192939495969798
# ("https://thehill.com/homenews/feed/", "The Hill"), # Requires user-agent string in headers # ("https://mindfulminutes.com/feed/", "Mindful Minutes"), # 520 status code, possibly requires headers # ("https://api.axios.com/feed/", "Axios"), # Requires JS and cookies # ("https://hackernoon.com/feed", "HackerNoon"), # corrupt rss # ("https://www.androidpolice.com/feed/", "AndroidPolice"), # parser-api returns '' ("https://www.entrepreneur.com/latest.rss", "Entrepreneur"), ("https://seths.blog/feed/", "Seth's Blog"), ("https://feeds.feedburner.com/CalculatedRisk", "Calculated Risk"), ("https://feeds.feedburner.com/StrategyBusiness-AllUpdates", "Strategy Business"), ("https://www.whowhatwear.co.uk/rss", "Who What Wear"), ("https://scitechdaily.com/feed/", "SciTechDaily"), ("https://uproxx.com/feed/", "UPROXX"), ("https://steveblank.com/feed/", "Steve Blank"), ("https://financialpost.com/feed", "Financial Post"), ("https://cms.qz.com/feed/", "Quartz"), # ("https://www.inc.com/rss/", "Inc.com"), # blocked by fw, requires browser-like behavior ("https://feeds.feedburner.com/fastcompany/headlines", "Fast Company"), ("https://sloanreview.mit.edu/feed/", "MIT Sloan Management"), ("https://moxie.foxbusiness.com/google-publisher/latest.xml", "Fox Business"), ( "https://www.businessoffashion.com/arc/outboundfeeds/rss/?outputType=xml", "Business of Fashion", ), # ("https://www.economist.com/the-world-this-week/rss.xml # feedparser.bozoexception: Could not parse feed h: <unknown>:2:0: syntax error # ("https://www.mckinsey.com/insights/rss", "McKinsey") # document declared as us-ascii, but parsed as utf-8 ("https://smallbiztrends.com/feed", "Small Business Trends"), ]