A free and open source alternative ekşi sözlük front-end
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
ozgursozluk/ozgursozluk/api.py

138 lines
3.7 KiB

from typing import Iterator
from dataclasses import dataclass
import flask
import requests
from bs4 import BeautifulSoup
from fake_useragent import UserAgent
from ozgursozluk.config import DEFAULT_EKSI_BASE_URL
CHARMAP = {
"ç": "c",
"ı": "i",
"ö": "o",
"ş": "s",
"ü": "u",
}
@dataclass
class Agenda:
title: str
views: str
pinned: bool
permalink: str
@dataclass
class Entry:
id: str
content: str
author: str
datetime: str
permalink: str
@dataclass
class Topic:
id: str
title: str
pagecount: int
permalink: str
entrys: Iterator[Entry]
def title_id(self) -> str:
return _unicode_tr(f"{self.title}--{self.id}".replace(" ", "-"))
class Eksi:
def __init__(self, base_url: str = DEFAULT_EKSI_BASE_URL) -> None:
self.base_url = base_url
self.headers = {"User-Agent": UserAgent().random}
def _get(self, endpoint: str = "/", params: dict = {}) -> dict:
response = requests.get(
f"{self.base_url}{endpoint}", params=params, headers=self.headers
)
if response.status_code != 200:
flask.abort(response.status_code)
return response
def _get_entrys(self, soup: BeautifulSoup) -> Iterator[Entry]:
entry_items = soup.find_all("li", id="entry-item")
for entry in entry_items:
a = entry.find("a", class_="entry-date permalink", href=True)
yield Entry(
entry.attrs["data-id"],
entry.find("div", class_="content"),
entry.find("a", class_="entry-author").text,
a.text,
self.base_url + a["href"],
)
def search_topic(self, q: str) -> Topic:
response = self._get("/", {"q": q})
soup = BeautifulSoup(response.content, "html.parser")
h1 = soup.find("h1", id="title")
pager = soup.find("div", class_="pager")
return Topic(
h1.attrs["data-id"],
h1.attrs["data-title"],
int(pager.attrs["data-pagecount"]) if pager is not None else 0,
self.base_url + h1.find("a", href=True)["href"],
self._get_entrys(soup),
)
def get_topic(self, title: str, page: int = 1) -> Topic:
response = self._get(f"/{title}", {"p": page})
soup = BeautifulSoup(response.content, "html.parser")
h1 = soup.find("h1", id="title")
pager = soup.find("div", class_="pager")
return Topic(
h1.attrs["data-id"],
h1.attrs["data-title"],
int(pager.attrs["data-pagecount"]) if pager is not None else 0,
self.base_url + h1.find("a", href=True)["href"],
self._get_entrys(soup),
)
def get_entry(self, id: int) -> Topic:
response = self._get(f"/entry/{id}")
soup = BeautifulSoup(response.content, "html.parser")
h1 = soup.find("h1", id="title")
return Topic(
h1.attrs["data-id"],
h1.attrs["data-title"],
0,
self.base_url + h1.find("a", href=True)["href"],
self._get_entrys(soup),
)
def get_agenda(self) -> Iterator[Agenda]:
response = self._get()
soup = BeautifulSoup(response.content, "html.parser")
topic_list = soup.find("ul", class_="topic-list").find_all("a", href=True)
for topic in topic_list:
yield Agenda(
topic.contents[0],
"" if len(topic.contents) < 2 else topic.contents[1],
topic.has_attr("class"),
topic["href"],
)
def _unicode_tr(text: str) -> str:
for key, value in CHARMAP.items():
text = text.replace(key, value)
return text