From b1f1199b7603920f8f0c81d8993e016d0469c2c2 Mon Sep 17 00:00:00 2001 From: odysseusmax Date: Mon, 17 Aug 2020 13:14:02 +0530 Subject: [PATCH] more flexible indexing --- README.md | 36 +++++++++++++++---- app.json | 6 ++-- app/__main__.py | 2 +- app/config.py | 18 ++++++---- app/routes.py | 94 +++++++++++++++++++++++++++++++++++++++---------- app/views.py | 30 +++++++++------- 6 files changed, 137 insertions(+), 49 deletions(-) diff --git a/README.md b/README.md index bca9271..5d3c2e4 100644 --- a/README.md +++ b/README.md @@ -14,14 +14,14 @@ Hosted demo site: https://tg-index-demo.herokuapp.com/ ## Deploy Guide -* Clone to local machine. +* **Clone to local machine.** ```bash $ git clone https://github.com/odysseusmax/tg-index.git $ cd tg-index ``` -* Create and activate virtual environment. +* **Create and activate virtual environment.** ```bash $ pip3 install virtualenv @@ -29,25 +29,47 @@ $ virtualenv venv $ source venv/bin/activate ``` -* Install dependencies. +* **Install dependencies.** ```bash $ pip3 install -U -r requirements.txt ``` -* Environment Variables. +* **Environment Variables.** -| **Variable Name** | **Value** +| Variable Name | Value |------------- | ------------- | `API_ID` (required) | Telegram api_id obtained from https://my.telegram.org/apps. | `API_HASH` (required) | Telegram api_hash obtained from https://my.telegram.org/apps. -| `CHAT_ID` (required) | Id of the telegram channel (or chat) to be indexed. Separate id's with space if you want to index more than one channel. +| `INDEX_SETTINGS` (required) | See the below description. | `SESSION_STRING` (required) | String obtained by running `$ python3 app/generate_session_string.py`. (Login with the telegram account which is a participant of the given channel (or chat). | `PORT` (optional) | Port on which app should listen to, defaults to 8080. | `HOST` (optional) | Host name on which app should listen to, defaults to 0.0.0.0. | `DEBUG` (optional) | Give some value to set logging level to debug, info by default. -* Run app. +* **Setting value for `INDEX_SETTINGS`** + +This is the general format, change the values for corresponding fields as your requirements. + +``` +{ + "index_all": true, + "index_private":false, + "index_group": false, + "index_channel": true, + "exclude_chats": [] + "include_chats": [], +} +``` +* `index_all` - Whether to consider all the chats associated with the telegram account. Value should either be `true` or `false`. +* `index_private` - Whether to index private chats. Only considered if `index_all` is set to `true`. Value should either be `true` or `false`. +* `index_group` - Whether to index group chats. Only considered if `index_all` is set to `true`. Value should either be `true` or `false`. +* `index_channel` - Whether to index channels. Only considered if `index_all` is set to `true`. Value should either be `true` or `false`. +* `exclude_chats` - An array/list of chat id's that should be ignored for indexing. Only considered if `index_all` is set to `true`. +* `include_chats` - An array/list of chat id's to index. Only considered if `index_all` is set to `false`. + + +* **Run app.** ```bash $ python3 -m app diff --git a/app.json b/app.json index 4024beb..60628db 100644 --- a/app.json +++ b/app.json @@ -12,9 +12,9 @@ "description":"Telegram api_hash obtained from https://my.telegram.org/apps.", "value":"" }, - "CHAT_ID":{ - "description":"Id of the telegram channel (or chat) to be indexed. Separate id's with space if you want to index more than one channel.", - "value":"" + "INDEX_SETTINGS":{ + "description":"Refer project docs for info.", + "value": "" }, "SESSION_STRING":{ "description":"Your session string.", diff --git a/app/__main__.py b/app/__main__.py index 80d3dbe..a024273 100644 --- a/app/__main__.py +++ b/app/__main__.py @@ -38,7 +38,7 @@ async def stop(app): async def init(): server = web.Application() await start() - setup_routes(server, Views(client)) + await setup_routes(server, Views(client)) setup_jinja(server) server.on_cleanup.append(stop) return server diff --git a/app/config.py b/app/config.py index 24daee3..575f87c 100644 --- a/app/config.py +++ b/app/config.py @@ -1,4 +1,5 @@ import traceback +import json import sys import os @@ -19,14 +20,17 @@ except (KeyError, ValueError): print("\n\nPlease set the API_ID and API_HASH environment variables correctly") print("You can get your own API keys at https://my.telegram.org/apps") sys.exit(1) - + try: - chat_id_raw = os.environ["CHAT_ID"].strip() - chat_ids = [int(chat_id.strip()) for chat_id in chat_id_raw.split(' ')] - alias_ids = [] -except (KeyError, ValueError): + index_settings_str = os.environ["INDEX_SETTINGS"].strip() + index_settings = json.loads(index_settings_str) + ''' + {"index_all": true, "index_private":false, "index_group": false, "index_channel": true, "include_chats": [], "exclude_chats": []} + + ''' +except: traceback.print_exc() - print("\n\nPlease set the CHAT_ID environment variable correctly") + print("\n\nPlease set the INDEX_SETTINGS environment variable correctly") sys.exit(1) try: @@ -38,3 +42,5 @@ except (KeyError, ValueError): host = os.environ.get("HOST", "0.0.0.0") debug = bool(os.environ.get("DEBUG")) +chat_ids = [] +alias_ids = [] diff --git a/app/routes.py b/app/routes.py index 538a138..63a29c2 100644 --- a/app/routes.py +++ b/app/routes.py @@ -1,32 +1,88 @@ import random import string +import logging from aiohttp import web -from .config import chat_ids, alias_ids +from .config import index_settings, alias_ids, chat_ids -def setup_routes(app, handler): +log = logging.getLogger(__name__) + + +def generate_alias_id(chat): + chat_id = chat.id + title = chat.title + while True: + alias_id = ''.join([random.choice(string.ascii_letters + string.digits) for _ in range(len(str(chat_id)))]) + if alias_id in alias_ids: + continue + alias_ids.append(alias_id) + chat_ids.append({ + 'chat_id': chat_id, + 'alias_id': alias_id, + 'title': title + }) + return alias_id + + +async def setup_routes(app, handler): h = handler + client = h.client routes = [ web.get('/', h.home, name='home') ] - for chat_id in chat_ids: - while True: - alias_id = ''.join([random.choice(string.ascii_letters + string.digits) for _ in range(len(str(chat_id)))]) - if alias_id in alias_ids: + index_all = index_settings['index_all'] + index_private = index_settings['index_private'] + index_group = index_settings['index_group'] + index_channel = index_settings['index_channel'] + exclude_chats = index_settings['exclude_chats'] + include_chats = index_settings['include_chats'] + if index_all: + async for chat in client.iter_dialogs(): + alias_id = None + if chat.id in exclude_chats: continue - alias_ids.append(alias_id) - break - p = f"/{alias_id}" - r = [ - web.get(p, h.index), - web.get(p + r"/logo", h.logo), - web.get(p + r"/{id:\d+}/view", h.info), - web.get(p + r"/{id:\d+}/download", h.download_get), - web.head(p + r"/{id:\d+}/download", h.download_head), - web.get(p + r"/{id:\d+}/thumbnail", h.thumbnail_get), - web.head(p + r"/{id:\d+}/thumbnail", h.thumbnail_head), - ] - routes += r + + if chat.is_user: + if index_private: + alias_id = generate_alias_id(chat) + elif chat.is_group: + if index_group: + alias_id = generate_alias_id(chat) + else: + if index_channel: + alias_id = generate_alias_id(chat) + + if not alias_id: + continue + + p = f"/{alias_id}" + r = [ + web.get(p, h.index), + web.get(p + r"/logo", h.logo), + web.get(p + r"/{id:\d+}/view", h.info), + web.get(p + r"/{id:\d+}/download", h.download_get), + web.head(p + r"/{id:\d+}/download", h.download_head), + web.get(p + r"/{id:\d+}/thumbnail", h.thumbnail_get), + web.head(p + r"/{id:\d+}/thumbnail", h.thumbnail_head), + ] + routes += r + log.debug(f"Index added for {chat.id} :: {chat.title} at /{alias_id}") + else: + for chat_id in include_chats: + chat = await client.get_entity(chat_id) + alias_id = generate_alias_id(chat) + p = f"/{alias_id}" + r = [ + web.get(p, h.index), + web.get(p + r"/logo", h.logo), + web.get(p + r"/{id:\d+}/view", h.info), + web.get(p + r"/{id:\d+}/download", h.download_get), + web.head(p + r"/{id:\d+}/download", h.download_head), + web.get(p + r"/{id:\d+}/thumbnail", h.thumbnail_get), + web.head(p + r"/{id:\d+}/thumbnail", h.thumbnail_head), + ] + routes += r + log.debug(f"Index added for {chat.id} :: {chat.title} at /{alias_id}") app.add_routes(routes) diff --git a/app/views.py b/app/views.py index 65437ed..eafac1c 100644 --- a/app/views.py +++ b/app/views.py @@ -7,7 +7,7 @@ from telethon.tl import types from telethon.tl.custom import Message from .util import get_file_name, get_human_size -from .config import chat_ids, alias_ids +from .config import index_settings, chat_ids log = logging.getLogger(__name__) @@ -22,13 +22,12 @@ class Views: @aiohttp_jinja2.template('home.html') async def home(self, req): if len(chat_ids) == 1: - raise web.HTTPFound(f"{alias_ids[0]}") + raise web.HTTPFound(f"{chat_ids[0]['alias_id']}") chats = [] - for chat_id, alias_id in zip(chat_ids, alias_ids): - chat = await self.client.get_entity(chat_id) + for chat in chat_ids: chats.append({ - 'id': alias_id, - 'name': chat.title + 'id': chat['alias_id'], + 'name': chat['title'] }) return {'chats':chats} @@ -36,8 +35,8 @@ class Views: @aiohttp_jinja2.template('index.html') async def index(self, req): alias_id = req.rel_url.path.split('/')[1] - chat_id = chat_ids[alias_ids.index(alias_id)] - chat = await self.client.get_entity(chat_id) + chat = [i for i in chat_ids if i['alias_id'] == alias_id][0] + chat_id = chat['chat_id'] log_msg = '' try: offset_val = int(req.query.get('page', '1')) @@ -67,6 +66,7 @@ class Views: log.debug(log_msg) results = [] for m in messages: + entry = None if m.file and not isinstance(m.media, types.MessageMediaWebPage): entry = dict( file_id=m.id, @@ -87,7 +87,8 @@ class Views: size=get_human_size(len(m.raw_text)), url=req.rel_url.with_path(f"/{alias_id}/{m.id}/view") ) - results.append(entry) + if entry: + results.append(entry) prev_page = False next_page = False if offset_val: @@ -114,7 +115,7 @@ class Views: 'cur_page' : offset_val+1, 'next_page': next_page, 'search': search_query, - 'name' : chat.title, + 'name' : chat['title'], 'logo': req.rel_url.with_path(f"/{alias_id}/logo") } @@ -123,7 +124,8 @@ class Views: async def info(self, req): file_id = int(req.match_info["id"]) alias_id = req.rel_url.path.split('/')[1] - chat_id = chat_ids[alias_ids.index(alias_id)] + chat = [i for i in chat_ids if i['alias_id'] == alias_id][0] + chat_id = chat['chat_id'] message = await self.client.get_messages(entity=chat_id, ids=file_id) if not message or not isinstance(message, Message): log.debug(f"no valid entry for {file_id} in {chat_id}") @@ -191,7 +193,8 @@ class Views: async def logo(self, req): alias_id = req.rel_url.path.split('/')[1] - chat_id = chat_ids[alias_ids.index(alias_id)] + chat = [i for i in chat_ids if i['alias_id'] == alias_id][0] + chat_id = chat['chat_id'] photo = await self.client.get_profile_photos(chat_id) if not photo: return web.Response(status=404, text="404: Chat has no profile photo") @@ -231,7 +234,8 @@ class Views: async def handle_request(self, req, head=False, thumb=False): file_id = int(req.match_info["id"]) alias_id = req.rel_url.path.split('/')[1] - chat_id = chat_ids[alias_ids.index(alias_id)] + chat = [i for i in chat_ids if i['alias_id'] == alias_id][0] + chat_id = chat['chat_id'] message = await self.client.get_messages(entity=chat_id, ids=file_id) if not message or not message.file: log.debug(f"no result for {file_id} in {chat_id}")