ozgursozluk/ozgursozluk/api.py

from typing import Iterator
from dataclasses import dataclass

import flask
import requests
from bs4 import BeautifulSoup
from fake_useragent import UserAgent


from ozgursozluk.config import DEFAULT_EKSI_BASE_URL


CHARMAP = {
    "ç": "c",
    "ı": "i",
    "ö": "o",
    "ş": "s",
    "ü": "u",
}


@dataclass
class Agenda:
    title: str
    views: str
    pinned: bool
    permalink: str


@dataclass
class Entry:
    id: str
    content: str
    author: str
    datetime: str
    permalink: str


@dataclass
class Topic:
    id: str
    title: str
    pagecount: int
    permalink: str
    entrys: Iterator[Entry]

    def title_id(self) -> str:
        return _unicode_tr(f"{self.title}--{self.id}".replace(" ", "-"))


class Eksi:
    def __init__(self, base_url: str = DEFAULT_EKSI_BASE_URL) -> None:
        self.base_url = base_url
        self.headers = {"User-Agent": UserAgent().random}

    def _get(self, endpoint: str = "/", params: dict = {}) -> dict:
        response = requests.get(
            f"{self.base_url}{endpoint}", params=params, headers=self.headers
        )

        if response.status_code != 200:
            flask.abort(response.status_code)

        return response

    def _get_entrys(self, soup: BeautifulSoup) -> Iterator[Entry]:
        entry_items = soup.find_all("li", id="entry-item")

        for entry in entry_items:
            a = entry.find("a", class_="entry-date permalink", href=True)
            yield Entry(
                entry.attrs["data-id"],
                entry.find("div", class_="content"),
                entry.find("a", class_="entry-author").text,
                a.text,
                self.base_url + a["href"],
            )

    def search_topic(self, q: str) -> Topic:
        response = self._get("/", {"q": q})
        soup = BeautifulSoup(response.content, "html.parser")
        h1 = soup.find("h1", id="title")
        pager = soup.find("div", class_="pager")

        return Topic(
            h1.attrs["data-id"],
            h1.attrs["data-title"],
            int(pager.attrs["data-pagecount"]) if pager is not None else 0,
            self.base_url + h1.find("a", href=True)["href"],
            self._get_entrys(soup),
        )

    def get_topic(self, title: str, page: int = 1) -> Topic:
        response = self._get(f"/{title}", {"p": page})
        soup = BeautifulSoup(response.content, "html.parser")
        h1 = soup.find("h1", id="title")
        pager = soup.find("div", class_="pager")

        return Topic(
            h1.attrs["data-id"],
            h1.attrs["data-title"],
            int(pager.attrs["data-pagecount"]) if pager is not None else 0,
            self.base_url + h1.find("a", href=True)["href"],
            self._get_entrys(soup),
        )

    def get_entry(self, id: int) -> Topic:
        response = self._get(f"/entry/{id}")
        soup = BeautifulSoup(response.content, "html.parser")
        h1 = soup.find("h1", id="title")

        return Topic(
            h1.attrs["data-id"],
            h1.attrs["data-title"],
            0,
            self.base_url + h1.find("a", href=True)["href"],
            self._get_entrys(soup),
        )

    def get_agenda(self) -> Iterator[Agenda]:
        response = self._get()
        soup = BeautifulSoup(response.content, "html.parser")
        topic_list = soup.find("ul", class_="topic-list").find_all("a", href=True)

        for topic in topic_list:
            yield Agenda(
                topic.contents[0],
                "" if len(topic.contents) < 2 else topic.contents[1],
                topic.has_attr("class"),
                topic["href"],
            )


def _unicode_tr(text: str) -> str:
    for key, value in CHARMAP.items():
        text = text.replace(key, value)

    return text