-
Alp Deniz Ogut authoredf22c9933
import os
RSS_ENV = os.environ.get("RSS_ENV", "dev")
RSS_POD_URL = os.environ.get("RSS_POD_URL", "https://dev.backend.memri.io")
RSS_OWNER_KEY = os.environ.get("RSS_OWNER_KEY", None)
RSS_DATABASE_KEY = os.environ.get("RSS_DATABASE_KEY", None)
RSS_SETUP_ON_START = os.environ.get("RSS_SETUP_ON_START", None) is not None
RSS_MAX_ENTRIES_ON_START = int(os.environ.get("RSS_MAX_ENTRIES_ON_START", 100))
RSS_DEFAULT_DELTA_DAYS = int(os.environ.get("RSS_DEFAULT_DELTA_DAYS", 1))
MIN_INPUT_LENGTH = 200
SUMMARY_MIN_LENGTH = 50
SUMMARY_MAX_LENGTH = 120
POSTLIGHT_API_URL = os.environ.get(
"POSTLIGHT_API_URL", "https://postlight-parser-api-7n2iai4dea-nw.a.run.app"
)
SUMMARY_SOURCE = "memri"
SEMANTIC_SEARCH_URL = os.environ.get(
"SEMANTIC_SEARCH_URL", "https://semantic-search.dev.backend.memri.io"
)
SUMMARIZATION_URL = os.environ.get(
"SUMMARIZATION_URL", "https://summarization.dev.backend.memri.io/"
)
MEMRI_BOT_URL = os.environ.get("MEMRI_BOT_URL", "https://chatbot.dev.backend.memri.io")
DEFAULT_FEEDS = [
("https://www.theguardian.com/international/rss", "The Guardian"),
("https://tinybuddha.com/emailrss", "Tiny Buddha"), # 10 entries
("https://www.livescience.com/feeds/all", "Live Science"),
("https://www.wellandgood.com/feed/", "Well+Good"),
("https://www.espn.com/espn/rss/news", "ESPN"), # Top headlines
("https://www.mindful.org/feed/", "Mindful.org"),
("https://www.space.com/feeds/all", "Space"),
("https://www.insider.co.uk/?service=rss", "Insider.co.uk"),
("https://www.calmsage.com/feed/", "Calm Sage"), # 10 entries
("http://rss.cnn.com/rss/edition.rss", "CNN"), # Top stories
(
"http://feeds.bbci.co.uk/news/rss.xml",
"BBC News",
),
(
"https://rss.nytimes.com/services/xml/rss/nyt/HomePage.xml",
"NY Times",
),
(
"https://www.reutersagency.com/feed/?taxonomy=best-topics&post_type=best",
"Reuters",
),
("https://decider.com/feed/", "Decider"), # 10 entries
("https://rss.punchng.com/v1/category/latest_news", "PUNCH"), # 30 entries
("https://www.dezeen.com/feed/", "Dezeen"), # 50 entries
("https://www.avclub.com/rss", "The AV Club"),
("https://kotaku.com/rss", "Kotaku"),
("https://gizmodo.com/rss", "Gizmodo"),
("https://feeds.feedburner.com/yankodesign", "Yanko Design"),
("https://feeds.feedburner.com/ign/all", "IGN"),
("https://feeds.macrumors.com/MacRumors-All", "MacRumors"),
("https://phys.org/rss-feed/", "Phys.org"),
("https://bgr.com/news/feed/", "BGR"),
(
"https://search.cnbc.com/rs/search/combinedcms/view.xml?partnerId=wrss01&id=100003114",
"CNBCE",
), # Top news
# ("https://www.buzzfeed.com/index.xml", "BuzzFeed"), # Contains :/// notation
("https://techcrunch.com/feed/", "TechCrunch"),
("https://thenextweb.com/feed", "The Next Web"),
("https://venturebeat.com/feed/", "VentureBeat"),
("https://www.wired.com/feed/rss", "Wired"),
("https://www.theverge.com/rss/index.xml", "The Verge"),
# ("https://www.reddit.com/top.rss?t=day", "Reddit"),
71727374757677787980818283848586878889909192939495969798
# ("https://thehill.com/homenews/feed/", "The Hill"), # Requires user-agent string in headers
# ("https://mindfulminutes.com/feed/", "Mindful Minutes"), # 520 status code, possibly requires headers
# ("https://api.axios.com/feed/", "Axios"), # Requires JS and cookies
# ("https://hackernoon.com/feed", "HackerNoon"), # corrupt rss
# ("https://www.androidpolice.com/feed/", "AndroidPolice"), # parser-api returns ''
("https://www.entrepreneur.com/latest.rss", "Entrepreneur"),
("https://seths.blog/feed/", "Seth's Blog"),
("https://feeds.feedburner.com/CalculatedRisk", "Calculated Risk"),
("https://feeds.feedburner.com/StrategyBusiness-AllUpdates", "Strategy Business"),
("https://www.whowhatwear.co.uk/rss", "Who What Wear"),
("https://scitechdaily.com/feed/", "SciTechDaily"),
("https://uproxx.com/feed/", "UPROXX"),
("https://steveblank.com/feed/", "Steve Blank"),
("https://financialpost.com/feed", "Financial Post"),
("https://cms.qz.com/feed/", "Quartz"),
# ("https://www.inc.com/rss/", "Inc.com"), # blocked by fw, requires browser-like behavior
("https://feeds.feedburner.com/fastcompany/headlines", "Fast Company"),
("https://sloanreview.mit.edu/feed/", "MIT Sloan Management"),
("https://moxie.foxbusiness.com/google-publisher/latest.xml", "Fox Business"),
(
"https://www.businessoffashion.com/arc/outboundfeeds/rss/?outputType=xml",
"Business of Fashion",
),
# ("https://www.economist.com/the-world-this-week/rss.xml # feedparser.bozoexception: Could not parse feed h: <unknown>:2:0: syntax error
# ("https://www.mckinsey.com/insights/rss", "McKinsey") # document declared as us-ascii, but parsed as utf-8
("https://smallbiztrends.com/feed", "Small Business Trends"),
]