Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -139,10 +139,20 @@
"metadata": {},
"outputs": [],
"source": [
"os.environ[\"ELASTICSEARCH_URL\"] = os.environ.get(\"ELASTICSEARCH_URL\") or getpass(\"Enter your Elasticsearch URL: \")\n",
"os.environ[\"ELASTICSEARCH_API_KEY\"] = os.environ.get(\"ELASTICSEARCH_API_KEY\") or getpass(\"Enter your Elasticsearch API key: \")\n",
"os.environ[\"NGROK_TOKEN\"] = os.environ.get(\"NGROK_TOKEN\") or getpass(\"Enter your Ngrok Token: \")\n",
"os.environ[\"ELASTICSEARCH_INDEX\"] = os.environ.get(\"ELASTICSEARCH_INDEX\") or getpass(\"Enter your Elasticsearch Index name (default: github_internal): \") or \"github_internal\"\n",
"os.environ[\"ELASTICSEARCH_URL\"] = os.environ.get(\"ELASTICSEARCH_URL\") or getpass(\n",
" \"Enter your Elasticsearch URL: \"\n",
")\n",
"os.environ[\"ELASTICSEARCH_API_KEY\"] = os.environ.get(\n",
" \"ELASTICSEARCH_API_KEY\"\n",
") or getpass(\"Enter your Elasticsearch API key: \")\n",
"os.environ[\"NGROK_TOKEN\"] = os.environ.get(\"NGROK_TOKEN\") or getpass(\n",
" \"Enter your Ngrok Token: \"\n",
")\n",
"os.environ[\"ELASTICSEARCH_INDEX\"] = (\n",
" os.environ.get(\"ELASTICSEARCH_INDEX\")\n",
" or getpass(\"Enter your Elasticsearch Index name (default: github_internal): \")\n",
" or \"github_internal\"\n",
")\n",
"\n",
"ELASTICSEARCH_URL = os.environ[\"ELASTICSEARCH_URL\"]\n",
"ELASTICSEARCH_API_KEY = os.environ[\"ELASTICSEARCH_API_KEY\"]\n",
Expand Down Expand Up @@ -177,10 +187,7 @@
},
"outputs": [],
"source": [
"es_client = Elasticsearch(\n",
" ELASTICSEARCH_URL,\n",
" api_key=ELASTICSEARCH_API_KEY\n",
")\n",
"es_client = Elasticsearch(ELASTICSEARCH_URL, api_key=ELASTICSEARCH_API_KEY)\n",
"\n",
"if es_client.ping():\n",
" print(\"Elasticsearch connection successful\")\n",
Expand Down Expand Up @@ -225,7 +232,7 @@
" \"text\": {\"type\": \"text\"},\n",
" \"text_semantic\": {\n",
" \"type\": \"semantic_text\",\n",
" \"inference_id\": \".elser-2-elasticsearch\"\n",
" \"inference_id\": \".elser-2-elasticsearch\",\n",
" },\n",
" \"url\": {\"type\": \"keyword\"},\n",
" \"type\": {\"type\": \"keyword\"},\n",
Expand All @@ -235,14 +242,14 @@
" \"created_date\": {\"type\": \"date\", \"format\": \"iso8601\"},\n",
" \"resolved_date\": {\"type\": \"date\", \"format\": \"iso8601\"},\n",
" \"labels\": {\"type\": \"keyword\"},\n",
" \"related_pr\": {\"type\": \"keyword\"}\n",
" \"related_pr\": {\"type\": \"keyword\"},\n",
" }\n",
" }\n",
" }\n",
" },\n",
" )\n",
" print(f\"Index '{INDEX_NAME}' created successfully\")\n",
"except Exception as e:\n",
" if 'resource_already_exists_exception' in str(e):\n",
" if \"resource_already_exists_exception\" in str(e):\n",
" print(f\"Index '{INDEX_NAME}' already exists\")\n",
" else:\n",
" print(f\"Error creating index: {e}\")"
Expand Down Expand Up @@ -629,10 +636,10 @@
}
],
"source": [
"file_path = 'github_internal_dataset.json'\n",
"file_path = \"github_internal_dataset.json\"\n",
"df = pd.read_json(file_path)\n",
"\n",
"documents = df.to_dict('records')\n",
"documents = df.to_dict(\"records\")\n",
"print(f\"Loaded {len(documents)} documents from dataset\")\n",
"\n",
"df"
Expand Down Expand Up @@ -663,11 +670,9 @@
"source": [
"def generate_actions():\n",
" for doc in documents:\n",
" doc['text_semantic'] = doc['text']\n",
" yield {\n",
" '_index': INDEX_NAME,\n",
" '_source': doc\n",
" }\n",
" doc[\"text_semantic\"] = doc[\"text\"]\n",
" yield {\"_index\": INDEX_NAME, \"_source\": doc}\n",
"\n",
"\n",
"try:\n",
" success, errors = bulk(es_client, generate_actions())\n",
Expand All @@ -679,7 +684,7 @@
" print(\"Waiting 15 seconds for ELSER to process documents...\")\n",
" time.sleep(15)\n",
"\n",
" count = es_client.count(index=INDEX_NAME)['count']\n",
" count = es_client.count(index=INDEX_NAME)[\"count\"]\n",
" print(f\"Total documents in index: {count}\")\n",
"\n",
"except Exception as e:\n",
Expand Down Expand Up @@ -725,10 +730,10 @@
"Use search to find relevant issues/PRs, then fetch to get complete details.\n",
"\"\"\"\n",
"\n",
"\n",
"def create_server():\n",
" mcp = FastMCP(\n",
" name=\"Elasticsearch GitHub Issues MCP\",\n",
" instructions=server_instructions\n",
" name=\"Elasticsearch GitHub Issues MCP\", instructions=server_instructions\n",
" )\n",
"\n",
" @mcp.tool()\n",
Expand Down Expand Up @@ -757,7 +762,7 @@
" \"query\": {\n",
" \"semantic\": {\n",
" \"field\": \"text_semantic\",\n",
" \"query\": query\n",
" \"query\": query,\n",
" }\n",
" }\n",
" }\n",
Expand All @@ -774,31 +779,33 @@
" \"assignee^2\",\n",
" \"type\",\n",
" \"labels\",\n",
" \"priority\"\n",
" \"priority\",\n",
" ],\n",
" \"type\": \"best_fields\",\n",
" \"fuzziness\": \"AUTO\"\n",
" \"fuzziness\": \"AUTO\",\n",
" }\n",
" }\n",
" }\n",
" }\n",
" },\n",
" ],\n",
" \"rank_window_size\": 50,\n",
" \"rank_constant\": 60\n",
" \"rank_constant\": 60,\n",
" }\n",
" }\n",
" },\n",
" )\n",
"\n",
" # Extract and format search results\n",
" results = []\n",
" if response and 'hits' in response:\n",
" for hit in response['hits']['hits']:\n",
" source = hit['_source']\n",
" results.append({\n",
" \"id\": source.get('id', hit['_id']),\n",
" \"title\": source.get('title', 'Unknown'),\n",
" \"url\": source.get('url', '')\n",
" })\n",
" if response and \"hits\" in response:\n",
" for hit in response[\"hits\"][\"hits\"]:\n",
" source = hit[\"_source\"]\n",
" results.append(\n",
" {\n",
" \"id\": source.get(\"id\", hit[\"_id\"]),\n",
" \"title\": source.get(\"title\", \"Unknown\"),\n",
" \"url\": source.get(\"url\", \"\"),\n",
" }\n",
" )\n",
"\n",
" logger.info(f\"Found {len(results)} results\")\n",
" return {\"results\": results}\n",
Expand All @@ -821,37 +828,29 @@
" try:\n",
" # Query by ID to get full document\n",
" response = es_client.search(\n",
" index=INDEX_NAME,\n",
" body={\n",
" \"query\": {\n",
" \"term\": {\n",
" \"id\": id\n",
" }\n",
" },\n",
" \"size\": 1\n",
" }\n",
" index=INDEX_NAME, body={\"query\": {\"term\": {\"id\": id}}, \"size\": 1}\n",
" )\n",
"\n",
" if not response or not response['hits']['hits']:\n",
" if not response or not response[\"hits\"][\"hits\"]:\n",
" raise ValueError(f\"Document with id '{id}' not found\")\n",
"\n",
" hit = response['hits']['hits'][0]\n",
" source = hit['_source']\n",
" hit = response[\"hits\"][\"hits\"][0]\n",
" source = hit[\"_source\"]\n",
"\n",
" # Return all document fields\n",
" result = {\n",
" \"id\": source.get('id', id),\n",
" \"title\": source.get('title', 'Unknown'),\n",
" \"text\": source.get('text', ''),\n",
" \"url\": source.get('url', ''),\n",
" \"type\": source.get('type', ''),\n",
" \"status\": source.get('status', ''),\n",
" \"priority\": source.get('priority', ''),\n",
" \"assignee\": source.get('assignee', ''),\n",
" \"created_date\": source.get('created_date', ''),\n",
" \"resolved_date\": source.get('resolved_date', ''),\n",
" \"labels\": source.get('labels', ''),\n",
" \"related_pr\": source.get('related_pr', '')\n",
" \"id\": source.get(\"id\", id),\n",
" \"title\": source.get(\"title\", \"Unknown\"),\n",
" \"text\": source.get(\"text\", \"\"),\n",
" \"url\": source.get(\"url\", \"\"),\n",
" \"type\": source.get(\"type\", \"\"),\n",
" \"status\": source.get(\"status\", \"\"),\n",
" \"priority\": source.get(\"priority\", \"\"),\n",
" \"assignee\": source.get(\"assignee\", \"\"),\n",
" \"created_date\": source.get(\"created_date\", \"\"),\n",
" \"resolved_date\": source.get(\"resolved_date\", \"\"),\n",
" \"labels\": source.get(\"labels\", \"\"),\n",
" \"related_pr\": source.get(\"related_pr\", \"\"),\n",
" }\n",
"\n",
" logger.info(f\"Fetched: {result['title']}\")\n",
Expand All @@ -863,6 +862,7 @@
"\n",
" return mcp\n",
"\n",
"\n",
"print(\"MCP server defined successfully\")"
]
},
Expand Down Expand Up @@ -892,16 +892,11 @@
"ngrok.set_auth_token(NGROK_TOKEN)\n",
"\n",
"pyngrok_config = PyngrokConfig(region=\"us\")\n",
"public_url = ngrok.connect(\n",
" 8000,\n",
" \"http\",\n",
" pyngrok_config=pyngrok_config,\n",
" bind_tls=True\n",
")\n",
"public_url = ngrok.connect(8000, \"http\", pyngrok_config=pyngrok_config, bind_tls=True)\n",
"\n",
"print(\"=\"*70)\n",
"print(\"=\" * 70)\n",
"print(\"MCP SERVER IS READY!\")\n",
"print(\"=\"*70)\n",
"print(\"=\" * 70)\n",
"print(f\"\\nPublic URL (use in ChatGPT): {public_url}/sse\")\n",
"print(\"\\nIMPORTANT: Copy the URL above (including /sse at the end)\")\n",
"print(\"\\nTo connect in ChatGPT:\")\n",
Expand All @@ -910,7 +905,7 @@
"print(\"3. Paste the URL above\")\n",
"print(\"4. Save and start using!\")\n",
"print(\"\\nKeep this notebook running while using the connector\")\n",
"print(\"=\"*70)"
"print(\"=\" * 70)"
]
},
{
Expand Down Expand Up @@ -1089,9 +1084,11 @@
"print(\"Server is running. To stop: Runtime > Interrupt execution\")\n",
"print()\n",
"\n",
"\n",
"def run_server():\n",
" server.run(transport=\"sse\", host=\"0.0.0.0\", port=8000)\n",
"\n",
"\n",
"server_thread = threading.Thread(target=run_server, daemon=True)\n",
"server_thread.start()\n",
"\n",
Expand Down Expand Up @@ -1143,8 +1140,10 @@
"outputs": [],
"source": [
"try:\n",
" result = es_client.options(ignore_status=[400, 404]).indices.delete(index=INDEX_NAME)\n",
" if result.get('acknowledged', False):\n",
" result = es_client.options(ignore_status=[400, 404]).indices.delete(\n",
" index=INDEX_NAME\n",
" )\n",
" if result.get(\"acknowledged\", False):\n",
" print(f\"Index '{INDEX_NAME}' deleted successfully\")\n",
" else:\n",
" print(f\"Error deleting index: {result}\")\n",
Expand Down