more flexible indexing

This commit is contained in:
odysseusmax 2020-08-17 13:14:02 +05:30
parent d014abb73f
commit b1f1199b76
6 changed files with 137 additions and 49 deletions

View File

@ -14,14 +14,14 @@ Hosted demo site: https://tg-index-demo.herokuapp.com/
## Deploy Guide ## Deploy Guide
* Clone to local machine. * **Clone to local machine.**
```bash ```bash
$ git clone https://github.com/odysseusmax/tg-index.git $ git clone https://github.com/odysseusmax/tg-index.git
$ cd tg-index $ cd tg-index
``` ```
* Create and activate virtual environment. * **Create and activate virtual environment.**
```bash ```bash
$ pip3 install virtualenv $ pip3 install virtualenv
@ -29,25 +29,47 @@ $ virtualenv venv
$ source venv/bin/activate $ source venv/bin/activate
``` ```
* Install dependencies. * **Install dependencies.**
```bash ```bash
$ pip3 install -U -r requirements.txt $ pip3 install -U -r requirements.txt
``` ```
* Environment Variables. * **Environment Variables.**
| **Variable Name** | **Value** | Variable Name | Value
|------------- | ------------- |------------- | -------------
| `API_ID` (required) | Telegram api_id obtained from https://my.telegram.org/apps. | `API_ID` (required) | Telegram api_id obtained from https://my.telegram.org/apps.
| `API_HASH` (required) | Telegram api_hash obtained from https://my.telegram.org/apps. | `API_HASH` (required) | Telegram api_hash obtained from https://my.telegram.org/apps.
| `CHAT_ID` (required) | Id of the telegram channel (or chat) to be indexed. Separate id's with space if you want to index more than one channel. | `INDEX_SETTINGS` (required) | See the below description.
| `SESSION_STRING` (required) | String obtained by running `$ python3 app/generate_session_string.py`. (Login with the telegram account which is a participant of the given channel (or chat). | `SESSION_STRING` (required) | String obtained by running `$ python3 app/generate_session_string.py`. (Login with the telegram account which is a participant of the given channel (or chat).
| `PORT` (optional) | Port on which app should listen to, defaults to 8080. | `PORT` (optional) | Port on which app should listen to, defaults to 8080.
| `HOST` (optional) | Host name on which app should listen to, defaults to 0.0.0.0. | `HOST` (optional) | Host name on which app should listen to, defaults to 0.0.0.0.
| `DEBUG` (optional) | Give some value to set logging level to debug, info by default. | `DEBUG` (optional) | Give some value to set logging level to debug, info by default.
* Run app. * **Setting value for `INDEX_SETTINGS`**
This is the general format, change the values for corresponding fields as your requirements.
```
{
"index_all": true,
"index_private":false,
"index_group": false,
"index_channel": true,
"exclude_chats": []
"include_chats": [],
}
```
* `index_all` - Whether to consider all the chats associated with the telegram account. Value should either be `true` or `false`.
* `index_private` - Whether to index private chats. Only considered if `index_all` is set to `true`. Value should either be `true` or `false`.
* `index_group` - Whether to index group chats. Only considered if `index_all` is set to `true`. Value should either be `true` or `false`.
* `index_channel` - Whether to index channels. Only considered if `index_all` is set to `true`. Value should either be `true` or `false`.
* `exclude_chats` - An array/list of chat id's that should be ignored for indexing. Only considered if `index_all` is set to `true`.
* `include_chats` - An array/list of chat id's to index. Only considered if `index_all` is set to `false`.
* **Run app.**
```bash ```bash
$ python3 -m app $ python3 -m app

View File

@ -12,8 +12,8 @@
"description":"Telegram api_hash obtained from https://my.telegram.org/apps.", "description":"Telegram api_hash obtained from https://my.telegram.org/apps.",
"value":"" "value":""
}, },
"CHAT_ID":{ "INDEX_SETTINGS":{
"description":"Id of the telegram channel (or chat) to be indexed. Separate id's with space if you want to index more than one channel.", "description":"Refer project docs for info.",
"value": "" "value": ""
}, },
"SESSION_STRING":{ "SESSION_STRING":{

View File

@ -38,7 +38,7 @@ async def stop(app):
async def init(): async def init():
server = web.Application() server = web.Application()
await start() await start()
setup_routes(server, Views(client)) await setup_routes(server, Views(client))
setup_jinja(server) setup_jinja(server)
server.on_cleanup.append(stop) server.on_cleanup.append(stop)
return server return server

View File

@ -1,4 +1,5 @@
import traceback import traceback
import json
import sys import sys
import os import os
@ -21,12 +22,15 @@ except (KeyError, ValueError):
sys.exit(1) sys.exit(1)
try: try:
chat_id_raw = os.environ["CHAT_ID"].strip() index_settings_str = os.environ["INDEX_SETTINGS"].strip()
chat_ids = [int(chat_id.strip()) for chat_id in chat_id_raw.split(' ')] index_settings = json.loads(index_settings_str)
alias_ids = [] '''
except (KeyError, ValueError): {"index_all": true, "index_private":false, "index_group": false, "index_channel": true, "include_chats": [], "exclude_chats": []}
'''
except:
traceback.print_exc() traceback.print_exc()
print("\n\nPlease set the CHAT_ID environment variable correctly") print("\n\nPlease set the INDEX_SETTINGS environment variable correctly")
sys.exit(1) sys.exit(1)
try: try:
@ -38,3 +42,5 @@ except (KeyError, ValueError):
host = os.environ.get("HOST", "0.0.0.0") host = os.environ.get("HOST", "0.0.0.0")
debug = bool(os.environ.get("DEBUG")) debug = bool(os.environ.get("DEBUG"))
chat_ids = []
alias_ids = []

View File

@ -1,23 +1,62 @@
import random import random
import string import string
import logging
from aiohttp import web from aiohttp import web
from .config import chat_ids, alias_ids from .config import index_settings, alias_ids, chat_ids
def setup_routes(app, handler): log = logging.getLogger(__name__)
h = handler
routes = [
web.get('/', h.home, name='home') def generate_alias_id(chat):
] chat_id = chat.id
for chat_id in chat_ids: title = chat.title
while True: while True:
alias_id = ''.join([random.choice(string.ascii_letters + string.digits) for _ in range(len(str(chat_id)))]) alias_id = ''.join([random.choice(string.ascii_letters + string.digits) for _ in range(len(str(chat_id)))])
if alias_id in alias_ids: if alias_id in alias_ids:
continue continue
alias_ids.append(alias_id) alias_ids.append(alias_id)
break chat_ids.append({
'chat_id': chat_id,
'alias_id': alias_id,
'title': title
})
return alias_id
async def setup_routes(app, handler):
h = handler
client = h.client
routes = [
web.get('/', h.home, name='home')
]
index_all = index_settings['index_all']
index_private = index_settings['index_private']
index_group = index_settings['index_group']
index_channel = index_settings['index_channel']
exclude_chats = index_settings['exclude_chats']
include_chats = index_settings['include_chats']
if index_all:
async for chat in client.iter_dialogs():
alias_id = None
if chat.id in exclude_chats:
continue
if chat.is_user:
if index_private:
alias_id = generate_alias_id(chat)
elif chat.is_group:
if index_group:
alias_id = generate_alias_id(chat)
else:
if index_channel:
alias_id = generate_alias_id(chat)
if not alias_id:
continue
p = f"/{alias_id}" p = f"/{alias_id}"
r = [ r = [
web.get(p, h.index), web.get(p, h.index),
@ -29,4 +68,21 @@ def setup_routes(app, handler):
web.head(p + r"/{id:\d+}/thumbnail", h.thumbnail_head), web.head(p + r"/{id:\d+}/thumbnail", h.thumbnail_head),
] ]
routes += r routes += r
log.debug(f"Index added for {chat.id} :: {chat.title} at /{alias_id}")
else:
for chat_id in include_chats:
chat = await client.get_entity(chat_id)
alias_id = generate_alias_id(chat)
p = f"/{alias_id}"
r = [
web.get(p, h.index),
web.get(p + r"/logo", h.logo),
web.get(p + r"/{id:\d+}/view", h.info),
web.get(p + r"/{id:\d+}/download", h.download_get),
web.head(p + r"/{id:\d+}/download", h.download_head),
web.get(p + r"/{id:\d+}/thumbnail", h.thumbnail_get),
web.head(p + r"/{id:\d+}/thumbnail", h.thumbnail_head),
]
routes += r
log.debug(f"Index added for {chat.id} :: {chat.title} at /{alias_id}")
app.add_routes(routes) app.add_routes(routes)

View File

@ -7,7 +7,7 @@ from telethon.tl import types
from telethon.tl.custom import Message from telethon.tl.custom import Message
from .util import get_file_name, get_human_size from .util import get_file_name, get_human_size
from .config import chat_ids, alias_ids from .config import index_settings, chat_ids
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
@ -22,13 +22,12 @@ class Views:
@aiohttp_jinja2.template('home.html') @aiohttp_jinja2.template('home.html')
async def home(self, req): async def home(self, req):
if len(chat_ids) == 1: if len(chat_ids) == 1:
raise web.HTTPFound(f"{alias_ids[0]}") raise web.HTTPFound(f"{chat_ids[0]['alias_id']}")
chats = [] chats = []
for chat_id, alias_id in zip(chat_ids, alias_ids): for chat in chat_ids:
chat = await self.client.get_entity(chat_id)
chats.append({ chats.append({
'id': alias_id, 'id': chat['alias_id'],
'name': chat.title 'name': chat['title']
}) })
return {'chats':chats} return {'chats':chats}
@ -36,8 +35,8 @@ class Views:
@aiohttp_jinja2.template('index.html') @aiohttp_jinja2.template('index.html')
async def index(self, req): async def index(self, req):
alias_id = req.rel_url.path.split('/')[1] alias_id = req.rel_url.path.split('/')[1]
chat_id = chat_ids[alias_ids.index(alias_id)] chat = [i for i in chat_ids if i['alias_id'] == alias_id][0]
chat = await self.client.get_entity(chat_id) chat_id = chat['chat_id']
log_msg = '' log_msg = ''
try: try:
offset_val = int(req.query.get('page', '1')) offset_val = int(req.query.get('page', '1'))
@ -67,6 +66,7 @@ class Views:
log.debug(log_msg) log.debug(log_msg)
results = [] results = []
for m in messages: for m in messages:
entry = None
if m.file and not isinstance(m.media, types.MessageMediaWebPage): if m.file and not isinstance(m.media, types.MessageMediaWebPage):
entry = dict( entry = dict(
file_id=m.id, file_id=m.id,
@ -87,6 +87,7 @@ class Views:
size=get_human_size(len(m.raw_text)), size=get_human_size(len(m.raw_text)),
url=req.rel_url.with_path(f"/{alias_id}/{m.id}/view") url=req.rel_url.with_path(f"/{alias_id}/{m.id}/view")
) )
if entry:
results.append(entry) results.append(entry)
prev_page = False prev_page = False
next_page = False next_page = False
@ -114,7 +115,7 @@ class Views:
'cur_page' : offset_val+1, 'cur_page' : offset_val+1,
'next_page': next_page, 'next_page': next_page,
'search': search_query, 'search': search_query,
'name' : chat.title, 'name' : chat['title'],
'logo': req.rel_url.with_path(f"/{alias_id}/logo") 'logo': req.rel_url.with_path(f"/{alias_id}/logo")
} }
@ -123,7 +124,8 @@ class Views:
async def info(self, req): async def info(self, req):
file_id = int(req.match_info["id"]) file_id = int(req.match_info["id"])
alias_id = req.rel_url.path.split('/')[1] alias_id = req.rel_url.path.split('/')[1]
chat_id = chat_ids[alias_ids.index(alias_id)] chat = [i for i in chat_ids if i['alias_id'] == alias_id][0]
chat_id = chat['chat_id']
message = await self.client.get_messages(entity=chat_id, ids=file_id) message = await self.client.get_messages(entity=chat_id, ids=file_id)
if not message or not isinstance(message, Message): if not message or not isinstance(message, Message):
log.debug(f"no valid entry for {file_id} in {chat_id}") log.debug(f"no valid entry for {file_id} in {chat_id}")
@ -191,7 +193,8 @@ class Views:
async def logo(self, req): async def logo(self, req):
alias_id = req.rel_url.path.split('/')[1] alias_id = req.rel_url.path.split('/')[1]
chat_id = chat_ids[alias_ids.index(alias_id)] chat = [i for i in chat_ids if i['alias_id'] == alias_id][0]
chat_id = chat['chat_id']
photo = await self.client.get_profile_photos(chat_id) photo = await self.client.get_profile_photos(chat_id)
if not photo: if not photo:
return web.Response(status=404, text="404: Chat has no profile photo") return web.Response(status=404, text="404: Chat has no profile photo")
@ -231,7 +234,8 @@ class Views:
async def handle_request(self, req, head=False, thumb=False): async def handle_request(self, req, head=False, thumb=False):
file_id = int(req.match_info["id"]) file_id = int(req.match_info["id"])
alias_id = req.rel_url.path.split('/')[1] alias_id = req.rel_url.path.split('/')[1]
chat_id = chat_ids[alias_ids.index(alias_id)] chat = [i for i in chat_ids if i['alias_id'] == alias_id][0]
chat_id = chat['chat_id']
message = await self.client.get_messages(entity=chat_id, ids=file_id) message = await self.client.get_messages(entity=chat_id, ids=file_id)
if not message or not message.file: if not message or not message.file:
log.debug(f"no result for {file_id} in {chat_id}") log.debug(f"no result for {file_id} in {chat_id}")