CoexistAI/app.py at main · ignithex/CoexistAI · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
from utils.websearch_utils import *
from utils.reddit_utils import *
from utils.map import *
from fastapi import FastAPI, Request
from pydantic import BaseModel
from utils.utils import *
from utils.map import *
from utils.git_utils import *
import subprocess
from fastapi_mcp import FastApiMCP
import json
import os
from model_config import *


# Use config values for model and embedding paths
llm_model_name = model_config.get("llm_model_name", 'google/gemma-3-12b')
llm_type = model_config.get("llm_type", 'local')
llm_tools = model_config.get("llm_tools",None)
llm_base_url = openai_compatible.get(model_config['llm_type'],
                                     'https://api.openai.com/v1')


llm_kwargs = model_config.get("llm_kwargs", {'temperature': 0.1,
                                            'max_tokens': None,
                                            'timeout': None,
                                            'api_key':llm_api_key,
                                            'max_retries': 2})

embed_kwargs = model_config.get("embed_kwargs", {})
embedding_model_name = model_config.get("embedding_model_name", "models/embedding-001")
embed_mode = model_config.get("embed_mode", "gemini")
cross_encoder_name = model_config.get("cross_encoder_name", "BAAI/bge-reranker-base")


if not is_searxng_running():
    subprocess.run([
        "docker", "run", "--rm",
        "-d", "-p", "30:8080",
        "-v", f"{os.getcwd()}/searxng:/etc/searxng",
        "-e", "BASE_URL=http://localhost:30/",
        "-e", "INSTANCE_NAME=my-instance",
        "searxng/searxng"
    ])
else:
    print("SearxNG docker container is already running.")

llm = get_generative_model(
    model_name=llm_model_name,
    type=llm_type,
    base_url=llm_base_url,
    _tools=None,
    kwargs=llm_kwargs
)

hf_embeddings, cross_encoder = load_model(embedding_model_name,
                                          _embed_mode=embed_mode,
                                          cross_encoder_name=cross_encoder_name,
                                          kwargs=embed_kwargs)

text_splitter = TokenTextSplitter(chunk_size=512, chunk_overlap=128)

searcher = SearchWeb(30)
date, day = get_local_data()
app = FastAPI(title='coexistai')

@app.get('/')
async def root():
    return {"message": "Welcome to CoexistAI!"}

class WebSearchRequest(BaseModel):
    query: str
    rerank: bool = True
    num_results: int = 3
    local_mode: bool = False
    split: bool = True
    document_paths: list[str] = []  # List of paths for local documents

class YouTubeSearchRequest(BaseModel):
    query: str
    prompt: str
    n: int = 1  # Number of videos to summarize, default is 1

class RedditSearchRequest(BaseModel):
    subreddit: str = None
    url_type: str = "hot"
    n: int = 3
    k: int = 1
    custom_url: str = None
    time_filter: str = "all"
    search_query: str = None
    sort_type: str = "relevance"

class MapSearchRequest(BaseModel):
    start_location: Optional[str] = None  # Start location can be a string or None
    end_location: Optional[str] = None  # End location can be a string or None
    pois_radius: int = 500  # Default radius for POIs in meters
    amenities: str = "restaurant|cafe|bar|hotel"  # Default amenities to search for
    limit: int = 3  # Default number of results to return
    task: str = "route_and_pois"  # Default task is to find a route

class WebSummarizeRequest(BaseModel):
    query: str
    url: str
    local_mode: bool = False

class GitTreeRequest(BaseModel):
    repobaseurl: str

class GitSearchRequest(BaseModel):
    repobaseurl: str
    parttoresearch: str
    query: str
    type: str

class LocalFolderTreeRequest(BaseModel):
    folder_path:str
    level: str = 'broad-first'
    prefix: str = ''

class ResearchCheckRequest(BaseModel):
    query: str
    toolsshorthand: str  # Default budget for deep research, can be adjusted as needed

class ClickableElementRequest(BaseModel):
    url:str
    query:str
    topk:int=10

@app.post('/clickable-elements', operation_id="get_website_structure")
async def get_website_structure(request: ClickableElementRequest):
    """
    Retrieves the top-k clickable elements from a given URL based on a query.
    This will help you to find out if there are any clickable elements on the page that match the query.
    You can use this to find deeper links since connected pieces of information are often linked together.
    Args:
        url (str): The URL to search for clickable elements.
        query (str): The query to filter the clickable elements.
        topk (int): The number of top clickable elements to return.
    Returns:
        list: A list of dictionaries containing the title, URL, and score of each clickable element.
    """
    return await get_topk_bm25_clickable_elements(request.url, request.query, request.topk)

@app.post('/local-folder-tree', operation_id="get_local_folder_tree")
async def get_local_folder_tree(request: LocalFolderTreeRequest):
    """
    Async Markdown folder tree.
    Args:
        folder_path (str): Root directory.
        level (str):
            - 'full': Show all folders and files, recursively, except hidden/system/cache entries.
            - 'broad-first': Only show immediate (top-level) folders and files (no nesting).
            - 'broad-second': Show top-level folders/files and their immediate child folders/files (two levels, no deeper).
        prefix (str): Indentation (internal)
    Returns:
        str: Markdown tree string
    """
    return await folder_tree(request.folder_path, level=request.level, prefix=request.prefix)

@app.post('/git-tree-search',operation_id="get_git_tree")
async def get_git_tree(request:GitTreeRequest):
   """
    Retrieves and returns the directory tree structure of a GitHub repository or a local Git repository.

    Args:
        url (str): The base URL of the GitHub repository (e.g., 'https://github.com/user/repo')
                   or the path to the local repository on your system.

    Returns:
        str: The directory tree structure as a string.
    """
   return await git_tree_search(request.repobaseurl)

@app.post('/git-search',operation_id="get_git_search")
async def get_git_search(request:GitSearchRequest):
   """
    Fetches the content of a specific part (directory or file) from either and does what asked in users query.
    First use get_git_tree to understand the structure of the repo and which part might be useful to answer users query
    - a GitHub repository (via URL), or
    - a local Git repository (via local path).

    Args:
        base_url (str): The base URL of the GitHub repository (e.g., 'https://github.com/user/repo'),
                        or the local path to the root of the repository.
        part (str): The path inside the repository you wish to access (e.g., 'basefolder/subfolder'). use get_git_tree for getting specific part if needed
        query (str): Users query
        type (str): "Folder" or "file"
    Returns:
        str: Response of the users query based on the content fetched
    """
   content = await git_specific_content(request.repobaseurl,request.parttoresearch,request.type)
   prompt = f"""You are a professional coder, your task is to answer the users query based on the content fetched from git repo
User Query: {request.query}
Fetched Content: {content}
"""

   result = llm.invoke(
        prompt
    )
   return result.content

@app.post('/web-search',operation_id="get_web_search")
async def websearch(request: WebSearchRequest):
    """
    Performs a web search and retrieves results, then generates a response based on those results.
    It also throws back the next steps, you should carry out your research until there are no next steps left.
    Args:
        query (str): The input query.
        rerank (bool): Whether to rerank results.
        num_results (int, optional): Number of search results to retrieve. Defaults to 3. (can take values from 1-5)
        document_paths (list of str, optional): List of paths for local documents/folders. Defaults to empty list. for an example [path1,path2,path3]. if different tasks are related to different documents
        local_mode (bool, optional): Whether to process local documents. Defaults to False.
        split (bool, optional): Whether to split documents into chunks. Defaults to True.

    Returns:
        tuple: Generated response, sources, search results, retrieved documents, and context.
    """
    # You may need to adjust these arguments based on your actual setup
    # For demonstration, using None for models and embeddings
    result = await query_web_response(
        query=request.query,
        date=date,
        day=day,
        websearcher=searcher,  # Replace with your actual searcher instance if needed
        hf_embeddings=hf_embeddings,
        rerank=request.rerank,
        cross_encoder=cross_encoder,
        model=llm,
        text_model=llm,
        num_results=min(2,request.num_results),
        document_paths=request.document_paths,
        local_mode=request.local_mode,
        split=request.split
    )
    return "result:" + result[0] + '\n\nsources:' + result[1]

@app.post('/web-summarize', operation_id="get_web_summarize")
async def websummarize(request: WebSummarizeRequest):
    """Generates a summary of a web page based on the provided query and URL.
    Args:
        query (str): The input query.
        url (str): The URL of the web page to summarize.
        model (str): The model to use for summarization.
        local_mode (bool): Whether to process local documents.
    Returns:
        dict: A dictionary containing the generated summary and sources."""
    result = await summary_of_url(
        query=request.query,
        url=request.url,
        model=llm,  # Replace with your actual model if needed
        local_mode=request.local_mode
    )
    return result

@app.post('/youtube-search', operation_id="get_youtube_search")
async def youtube_search(request: YouTubeSearchRequest):
    """Performs a YouTube search and return summaries of it.
    Args:
        query (str): The YouTube video URL if provided else search term
        prompt (str): The prompt to generate a response from the transcript.
        n (int): Number of videos to summarize if search term is provided instead of URL.
    Returns:
        dict: response from the YouTube transcripts based on the given query"""
    # You may need to adjust the model argument as per your setup
    result = youtube_transcript_response(
        request.query,
        request.prompt,
        n = request.n, #number of videos to summarise
        model=llm  # Replace with your actual model if needed
    )
    return result

@app.post('/reddit-search', operation_id="get_reddit_search")
async def reddit_search(request: RedditSearchRequest):
    """Performs a Reddit search and retrieves posts based on the provided parameters.
    Args:
        subreddit (str): The subreddit to search in. When search_query is provided
        url_type (str): The type of Reddit URL to fetch (e.g., 'search','hot', 'new','top','best','controversial','rising').
                        set to 'search' if specific search_query is provided
        n (int): Number of posts to retrieve.
        k (int): Number of comments on each post to return after processing. When more perspectives needed increase this.
        custom_url (str): Custom URL for Reddit search.
        time_filter (str): Time filter for the search (e.g., 'all', 'day').
        search_query (str): Search query for Reddit posts.
        sort_type (str): Sorting type for the results.
        Returns:
            dict: A dictionary containing the results of the Reddit search."""
    # You may need to adjust the model argument as per your setup
    if request.search_query:
        request.url_type = 'search'
    result = reddit_reader_response(
        subreddit=request.subreddit,
        url_type=request.url_type,
        n=request.n,
        k=request.k,
        custom_url=request.custom_url,
        time_filter=request.time_filter,
        search_query=request.search_query,
        sort_type=request.sort_type,
        model=llm  # Replace with your actual model if needed
    )
    return result

@app.post('/map-search', operation_id="get_map_search")
async def map_search(request: MapSearchRequest):
    """Performs a map search and retrieves the route and points of interest like  (POIs) between two locations.
    Args:
        start_location (optional str): The starting location for the route. can be None as well
        end_location (optional str): The destination location for the route.can be None as well
        pois_radius (int): Radius in meters to search for points of interest around the route.
        amenities (str): Types of amenities to search for, separated by '|'. For example, "restaurant|cafe|bar|hotel".
        limit (int): Maximum number of POIs to return.
        task (str): The task to perform, either "location_only" - if lat long of start and end location is needed,
            else by default is "route_and_pois" - if route and POIs are needed.
    Returns:
        dict: location or route and POIs or both"""
    result = generate_map(request.start_location,
                        request.end_location,
                        pois_radius=request.pois_radius,
                        amenities=request.amenities,
                        limit=request.limit,
                        task=request.task,
                        )
    return result

@app.post('/check-response', operation_id="get_response_check")
async def check_response(request: ResearchCheckRequest):
    """
    Evaluates whether the agent's collected information is complete for writing answer to the user's query.
    If any aspect is missing, list them all in bullet format
    Args:
        query (str): The user's original query.
        toolsshorthand (str):  Exact Facts/Information collected in bullets from every past tool usage which would be useful to answer
    Returns:
        str: Suggestions for improvement or confirmation that all aspects are addressed.
    """
    system_prompt = f"""You are a professional researcher.
Review the following user query and the agent's short hand of informations collected.
If not explicitly asked for deep research, you should just check if most necessary information and all aspects present in query are covered, NO NEED TO SUGGEST EXTRA, SINCE ITS QUICK QUERY
Determine if the shorthand fully addresses every aspect and intent of the query.
If any part is missing or could be improved, list the specific aspects or suggestions for further research or value addition.(IF DEEP RESEARCH ASKED EXPLICITLY)
If the response is complete, state that all aspects have been addressed.

User Query: {request.query}
Agent Shorthand: {request.toolsshorthand}
"""

    result = llm.invoke(
        system_prompt
    )
    return result.content

mcp = FastApiMCP(app,include_operations=['get_web_search',
                                         'get_web_summarize',
                                         'get_youtube_search',
                                         'get_reddit_search',
                                         'get_map_search',
                                         "get_git_tree",
                                         "get_git_search",
                                         "get_local_folder_tree",
                                         "get_response_check",
                                         "get_website_structure"
                                         ],)
mcp.mount()