more flexible indexing

This commit is contained in:
odysseusmax 2020-08-17 13:14:02 +05:30
parent d014abb73f
commit b1f1199b76
6 changed files with 137 additions and 49 deletions

View File

@ -14,14 +14,14 @@ Hosted demo site: https://tg-index-demo.herokuapp.com/
## Deploy Guide
* Clone to local machine.
* **Clone to local machine.**
```bash
$ git clone https://github.com/odysseusmax/tg-index.git
$ cd tg-index
```
* Create and activate virtual environment.
* **Create and activate virtual environment.**
```bash
$ pip3 install virtualenv
@ -29,25 +29,47 @@ $ virtualenv venv
$ source venv/bin/activate
```
* Install dependencies.
* **Install dependencies.**
```bash
$ pip3 install -U -r requirements.txt
```
* Environment Variables.
* **Environment Variables.**
| **Variable Name** | **Value**
| Variable Name | Value
|------------- | -------------
| `API_ID` (required) | Telegram api_id obtained from https://my.telegram.org/apps.
| `API_HASH` (required) | Telegram api_hash obtained from https://my.telegram.org/apps.
| `CHAT_ID` (required) | Id of the telegram channel (or chat) to be indexed. Separate id's with space if you want to index more than one channel.
| `INDEX_SETTINGS` (required) | See the below description.
| `SESSION_STRING` (required) | String obtained by running `$ python3 app/generate_session_string.py`. (Login with the telegram account which is a participant of the given channel (or chat).
| `PORT` (optional) | Port on which app should listen to, defaults to 8080.
| `HOST` (optional) | Host name on which app should listen to, defaults to 0.0.0.0.
| `DEBUG` (optional) | Give some value to set logging level to debug, info by default.
* Run app.
* **Setting value for `INDEX_SETTINGS`**
This is the general format, change the values for corresponding fields as your requirements.
```
{
"index_all": true,
"index_private":false,
"index_group": false,
"index_channel": true,
"exclude_chats": []
"include_chats": [],
}
```
* `index_all` - Whether to consider all the chats associated with the telegram account. Value should either be `true` or `false`.
* `index_private` - Whether to index private chats. Only considered if `index_all` is set to `true`. Value should either be `true` or `false`.
* `index_group` - Whether to index group chats. Only considered if `index_all` is set to `true`. Value should either be `true` or `false`.
* `index_channel` - Whether to index channels. Only considered if `index_all` is set to `true`. Value should either be `true` or `false`.
* `exclude_chats` - An array/list of chat id's that should be ignored for indexing. Only considered if `index_all` is set to `true`.
* `include_chats` - An array/list of chat id's to index. Only considered if `index_all` is set to `false`.
* **Run app.**
```bash
$ python3 -m app

View File

@ -12,9 +12,9 @@
"description":"Telegram api_hash obtained from https://my.telegram.org/apps.",
"value":""
},
"CHAT_ID":{
"description":"Id of the telegram channel (or chat) to be indexed. Separate id's with space if you want to index more than one channel.",
"value":""
"INDEX_SETTINGS":{
"description":"Refer project docs for info.",
"value": ""
},
"SESSION_STRING":{
"description":"Your session string.",

View File

@ -38,7 +38,7 @@ async def stop(app):
async def init():
server = web.Application()
await start()
setup_routes(server, Views(client))
await setup_routes(server, Views(client))
setup_jinja(server)
server.on_cleanup.append(stop)
return server

View File

@ -1,4 +1,5 @@
import traceback
import json
import sys
import os
@ -19,14 +20,17 @@ except (KeyError, ValueError):
print("\n\nPlease set the API_ID and API_HASH environment variables correctly")
print("You can get your own API keys at https://my.telegram.org/apps")
sys.exit(1)
try:
chat_id_raw = os.environ["CHAT_ID"].strip()
chat_ids = [int(chat_id.strip()) for chat_id in chat_id_raw.split(' ')]
alias_ids = []
except (KeyError, ValueError):
index_settings_str = os.environ["INDEX_SETTINGS"].strip()
index_settings = json.loads(index_settings_str)
'''
{"index_all": true, "index_private":false, "index_group": false, "index_channel": true, "include_chats": [], "exclude_chats": []}
'''
except:
traceback.print_exc()
print("\n\nPlease set the CHAT_ID environment variable correctly")
print("\n\nPlease set the INDEX_SETTINGS environment variable correctly")
sys.exit(1)
try:
@ -38,3 +42,5 @@ except (KeyError, ValueError):
host = os.environ.get("HOST", "0.0.0.0")
debug = bool(os.environ.get("DEBUG"))
chat_ids = []
alias_ids = []

View File

@ -1,32 +1,88 @@
import random
import string
import logging
from aiohttp import web
from .config import chat_ids, alias_ids
from .config import index_settings, alias_ids, chat_ids
def setup_routes(app, handler):
log = logging.getLogger(__name__)
def generate_alias_id(chat):
chat_id = chat.id
title = chat.title
while True:
alias_id = ''.join([random.choice(string.ascii_letters + string.digits) for _ in range(len(str(chat_id)))])
if alias_id in alias_ids:
continue
alias_ids.append(alias_id)
chat_ids.append({
'chat_id': chat_id,
'alias_id': alias_id,
'title': title
})
return alias_id
async def setup_routes(app, handler):
h = handler
client = h.client
routes = [
web.get('/', h.home, name='home')
]
for chat_id in chat_ids:
while True:
alias_id = ''.join([random.choice(string.ascii_letters + string.digits) for _ in range(len(str(chat_id)))])
if alias_id in alias_ids:
index_all = index_settings['index_all']
index_private = index_settings['index_private']
index_group = index_settings['index_group']
index_channel = index_settings['index_channel']
exclude_chats = index_settings['exclude_chats']
include_chats = index_settings['include_chats']
if index_all:
async for chat in client.iter_dialogs():
alias_id = None
if chat.id in exclude_chats:
continue
alias_ids.append(alias_id)
break
p = f"/{alias_id}"
r = [
web.get(p, h.index),
web.get(p + r"/logo", h.logo),
web.get(p + r"/{id:\d+}/view", h.info),
web.get(p + r"/{id:\d+}/download", h.download_get),
web.head(p + r"/{id:\d+}/download", h.download_head),
web.get(p + r"/{id:\d+}/thumbnail", h.thumbnail_get),
web.head(p + r"/{id:\d+}/thumbnail", h.thumbnail_head),
]
routes += r
if chat.is_user:
if index_private:
alias_id = generate_alias_id(chat)
elif chat.is_group:
if index_group:
alias_id = generate_alias_id(chat)
else:
if index_channel:
alias_id = generate_alias_id(chat)
if not alias_id:
continue
p = f"/{alias_id}"
r = [
web.get(p, h.index),
web.get(p + r"/logo", h.logo),
web.get(p + r"/{id:\d+}/view", h.info),
web.get(p + r"/{id:\d+}/download", h.download_get),
web.head(p + r"/{id:\d+}/download", h.download_head),
web.get(p + r"/{id:\d+}/thumbnail", h.thumbnail_get),
web.head(p + r"/{id:\d+}/thumbnail", h.thumbnail_head),
]
routes += r
log.debug(f"Index added for {chat.id} :: {chat.title} at /{alias_id}")
else:
for chat_id in include_chats:
chat = await client.get_entity(chat_id)
alias_id = generate_alias_id(chat)
p = f"/{alias_id}"
r = [
web.get(p, h.index),
web.get(p + r"/logo", h.logo),
web.get(p + r"/{id:\d+}/view", h.info),
web.get(p + r"/{id:\d+}/download", h.download_get),
web.head(p + r"/{id:\d+}/download", h.download_head),
web.get(p + r"/{id:\d+}/thumbnail", h.thumbnail_get),
web.head(p + r"/{id:\d+}/thumbnail", h.thumbnail_head),
]
routes += r
log.debug(f"Index added for {chat.id} :: {chat.title} at /{alias_id}")
app.add_routes(routes)

View File

@ -7,7 +7,7 @@ from telethon.tl import types
from telethon.tl.custom import Message
from .util import get_file_name, get_human_size
from .config import chat_ids, alias_ids
from .config import index_settings, chat_ids
log = logging.getLogger(__name__)
@ -22,13 +22,12 @@ class Views:
@aiohttp_jinja2.template('home.html')
async def home(self, req):
if len(chat_ids) == 1:
raise web.HTTPFound(f"{alias_ids[0]}")
raise web.HTTPFound(f"{chat_ids[0]['alias_id']}")
chats = []
for chat_id, alias_id in zip(chat_ids, alias_ids):
chat = await self.client.get_entity(chat_id)
for chat in chat_ids:
chats.append({
'id': alias_id,
'name': chat.title
'id': chat['alias_id'],
'name': chat['title']
})
return {'chats':chats}
@ -36,8 +35,8 @@ class Views:
@aiohttp_jinja2.template('index.html')
async def index(self, req):
alias_id = req.rel_url.path.split('/')[1]
chat_id = chat_ids[alias_ids.index(alias_id)]
chat = await self.client.get_entity(chat_id)
chat = [i for i in chat_ids if i['alias_id'] == alias_id][0]
chat_id = chat['chat_id']
log_msg = ''
try:
offset_val = int(req.query.get('page', '1'))
@ -67,6 +66,7 @@ class Views:
log.debug(log_msg)
results = []
for m in messages:
entry = None
if m.file and not isinstance(m.media, types.MessageMediaWebPage):
entry = dict(
file_id=m.id,
@ -87,7 +87,8 @@ class Views:
size=get_human_size(len(m.raw_text)),
url=req.rel_url.with_path(f"/{alias_id}/{m.id}/view")
)
results.append(entry)
if entry:
results.append(entry)
prev_page = False
next_page = False
if offset_val:
@ -114,7 +115,7 @@ class Views:
'cur_page' : offset_val+1,
'next_page': next_page,
'search': search_query,
'name' : chat.title,
'name' : chat['title'],
'logo': req.rel_url.with_path(f"/{alias_id}/logo")
}
@ -123,7 +124,8 @@ class Views:
async def info(self, req):
file_id = int(req.match_info["id"])
alias_id = req.rel_url.path.split('/')[1]
chat_id = chat_ids[alias_ids.index(alias_id)]
chat = [i for i in chat_ids if i['alias_id'] == alias_id][0]
chat_id = chat['chat_id']
message = await self.client.get_messages(entity=chat_id, ids=file_id)
if not message or not isinstance(message, Message):
log.debug(f"no valid entry for {file_id} in {chat_id}")
@ -191,7 +193,8 @@ class Views:
async def logo(self, req):
alias_id = req.rel_url.path.split('/')[1]
chat_id = chat_ids[alias_ids.index(alias_id)]
chat = [i for i in chat_ids if i['alias_id'] == alias_id][0]
chat_id = chat['chat_id']
photo = await self.client.get_profile_photos(chat_id)
if not photo:
return web.Response(status=404, text="404: Chat has no profile photo")
@ -231,7 +234,8 @@ class Views:
async def handle_request(self, req, head=False, thumb=False):
file_id = int(req.match_info["id"])
alias_id = req.rel_url.path.split('/')[1]
chat_id = chat_ids[alias_ids.index(alias_id)]
chat = [i for i in chat_ids if i['alias_id'] == alias_id][0]
chat_id = chat['chat_id']
message = await self.client.get_messages(entity=chat_id, ids=file_id)
if not message or not message.file:
log.debug(f"no result for {file_id} in {chat_id}")