Mbonea commited on
Commit
32989ce
1 Parent(s): 50d8f5b

tts and doc update

Browse files
App/Embedding/EmbeddingRoutes.py CHANGED
@@ -1,4 +1,4 @@
1
- from fastapi import APIRouter
2
 
3
  from .utils.Initialize import TextSearch, IdSearch
4
  from .Schemas import SearchRequest, AddDocumentRequest
@@ -13,8 +13,11 @@ async def create_embeddings(req: AddDocumentRequest):
13
 
14
 
15
  @embeddigs_router.post("/search_id")
16
- async def search_id(req: SearchRequest):
17
- return IdSearch(query=req.query)
 
 
 
18
 
19
 
20
  @embeddigs_router.post("/search_text")
 
1
+ from fastapi import APIRouter, BackgroundTasks
2
 
3
  from .utils.Initialize import TextSearch, IdSearch
4
  from .Schemas import SearchRequest, AddDocumentRequest
 
13
 
14
 
15
  @embeddigs_router.post("/search_id")
16
+ async def search_id(
17
+ req: SearchRequest,
18
+ background_tasks: BackgroundTasks,
19
+ ):
20
+ return IdSearch(query=req.query, background_task=background_tasks)
21
 
22
 
23
  @embeddigs_router.post("/search_text")
App/Embedding/utils/Initialize.py CHANGED
@@ -1,52 +1,73 @@
1
  from langchain.embeddings import HuggingFaceEmbeddings
2
  from langchain.docstore.document import Document
3
  from langchain.vectorstores import Pinecone
4
- import os,requests
5
- import pinecone,pprint
 
6
  from .Elastic import FetchDocuments
7
 
8
 
9
- index_name = 'movie-recommender-fast'
10
  model_name = "thenlper/gte-base"
11
  embeddings = HuggingFaceEmbeddings(model_name=model_name)
12
 
13
- TMDB_API=os.environ.get('TMDB_API')
14
 
15
  # get api key from app.pinecone.io
16
- PINECONE_API_KEY = os.environ.get('PINECONE_API_KEY')
17
  # find your environment next to the api key in pinecone console
18
- PINECONE_ENV = os.environ.get('PINECONE_ENVIRONMENT')
 
 
 
19
 
20
- pinecone.init(
21
- api_key=PINECONE_API_KEY,
22
- environment=PINECONE_ENV
23
- )
24
 
25
  docsearch = Pinecone.from_existing_index(index_name, embeddings)
26
 
27
- def generate_text(doc):
28
- if doc['tv_results']:
29
- return pprint.pformat(doc['tv_results'][0]),doc['tv_results'][0]
30
- return pprint.pformat(doc['movie_results'][0]),doc['movie_results'][0]
31
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
 
34
- def IdSearch(query:str):
35
- doc=requests.get(f'https://api.themoviedb.org/3/find/{query}?external_source=imdb_id&language=en&api_key={TMDB_API}').json()
 
 
36
  try:
37
- text,props=generate_text(doc)
38
  except Exception as e:
39
  print(e)
40
  return []
41
- return TextSearch(text,filter={"key": {"$ne":query}})
42
-
43
 
44
 
45
- def TextSearch(query: str,filter=None):
46
- docs = docsearch.similarity_search(query,k=10,filter=filter)
47
- keys= [ doc.metadata['key'] for doc in docs ]
48
  return FetchDocuments(keys)
49
-
50
-
51
-
52
-
 
1
  from langchain.embeddings import HuggingFaceEmbeddings
2
  from langchain.docstore.document import Document
3
  from langchain.vectorstores import Pinecone
4
+ from fastapi import BackgroundTasks
5
+ import os, requests
6
+ import pinecone, pprint
7
  from .Elastic import FetchDocuments
8
 
9
 
10
+ index_name = "movie-recommender-fast"
11
  model_name = "thenlper/gte-base"
12
  embeddings = HuggingFaceEmbeddings(model_name=model_name)
13
 
14
+ TMDB_API = os.environ.get("TMDB_API")
15
 
16
  # get api key from app.pinecone.io
17
+ PINECONE_API_KEY = os.environ.get("PINECONE_API_KEY")
18
  # find your environment next to the api key in pinecone console
19
+ PINECONE_ENV = os.environ.get("PINECONE_ENVIRONMENT")
20
+
21
+ pinecone.init(api_key=PINECONE_API_KEY, environment=PINECONE_ENV)
22
+ vector_index = pinecone.Index(index_name=index_name)
23
 
 
 
 
 
24
 
25
  docsearch = Pinecone.from_existing_index(index_name, embeddings)
26
 
 
 
 
 
27
 
28
+ def check_if_exists(imdb_id):
29
+ results = vector_index.query(filter={"key": {"$eq": imdb_id}}, top_k=1)
30
+ if results:
31
+ return True
32
+ else:
33
+ return False
34
+
35
+
36
+ def add_document(imdb_id, doc):
37
+ response = check_if_exists(imdb_id=imdb_id)
38
+ if response:
39
+ print("document exists")
40
+ return
41
+ text, temp_doc = doc
42
+ temp_doc["key"] = imdb_id
43
+ temp = Document(
44
+ page_content=text,
45
+ metadata=temp_doc,
46
+ )
47
+ print("document added")
48
+ docsearch.add_documents([temp])
49
+
50
+
51
+ def generate_text(doc):
52
+ if doc["tv_results"]:
53
+ return pprint.pformat(doc["tv_results"][0]), doc["tv_results"][0]
54
+ return pprint.pformat(doc["movie_results"][0]), doc["movie_results"][0]
55
 
56
 
57
+ def IdSearch(query: str, background_task: BackgroundTasks):
58
+ doc = requests.get(
59
+ f"https://api.themoviedb.org/3/find/{query}?external_source=imdb_id&language=en&api_key={TMDB_API}"
60
+ ).json()
61
  try:
62
+ text, props = generate_text(doc)
63
  except Exception as e:
64
  print(e)
65
  return []
66
+ background_task.add_task(add_document, imdb_id=query, doc=(text, props))
67
+ return TextSearch(text, filter={"key": {"$ne": query}})
68
 
69
 
70
+ def TextSearch(query: str, filter=None):
71
+ docs = docsearch.similarity_search(query, k=10, filter=filter)
72
+ keys = [doc.metadata["key"] for doc in docs]
73
  return FetchDocuments(keys)
 
 
 
 
App/TTS/Schemas.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel,Field
2
+ from typing import List,Optional
3
+ import uuid
4
+
5
+ class Speak(BaseModel):
6
+ paragraphId: str = Field(default_factory=lambda: str(uuid.uuid4()))
7
+ speaker: str
8
+ text: str
9
+ voiceId: str = Field(default="c60166365edf46589657770d", alias="speaker") # Default speaker value
10
+
11
+ def __init__(self, **data):
12
+ data["text"] = data.get('text') if '<speak>' in data.get('text') else f"<speak>{data.get('text')}</speak>"
13
+ super().__init__(**data)
14
+
15
+
16
+
17
+ class TTSGenerateRequest(BaseModel):
18
+ paragraphs: List[Speak]
19
+ requestId: str = Field(default_factory=lambda: str(uuid.uuid4()))
20
+ workspaceId: str =Field(default_factory=lambda: str(uuid.uuid4()))
21
+
22
+
23
+ class StatusRequest(BaseModel):
24
+ requestId: str
25
+
26
+
27
+ class GetTranscriptions(BaseModel):
28
+ userId: int
App/TTS/TTSRoutes.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter
2
+
3
+
4
+ from .Schemas import StatusRequest, TTSGenerateRequest
5
+ from .utils.Podcastle import PodcastleAPI
6
+ import os
7
+
8
+ tts_router = APIRouter(tags=["TTS"])
9
+ data = {"username": os.environ.get("USERNAME"), "password": os.environ.get("PASSWORD")}
10
+ tts = PodcastleAPI(**data)
11
+
12
+
13
+ #
14
+ @tts_router.post("/generate_tts")
15
+ async def generate_voice(req: TTSGenerateRequest):
16
+ print("here --entered!")
17
+ return await tts.make_request(req)
18
+
19
+
20
+ @tts_router.post("/status")
21
+ async def search_id(req: StatusRequest):
22
+ return await tts.check_status(req)
23
+
24
+
25
+ # @tts_router.post("/search_text")
26
+ # async def search_text(req: SearchRequest):
27
+ # return TextSearch(query=req.query)
App/TTS/utils/Podcastle.py ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import aiohttp
2
+ import asyncio
3
+ from App.TTS.Schemas import TTSGenerateRequest,StatusRequest
4
+ from pydantic import BaseModel
5
+
6
+ class PodcastleAPI:
7
+ def __init__(self, username, password):
8
+ self.base_url = "https://podcastle.ai/api"
9
+ self.username = username
10
+ self.password = password
11
+ self.headers = {
12
+ 'authority': 'podcastle.ai',
13
+ 'accept': '*/*',
14
+ 'accept-language': 'en-US,en;q=0.9',
15
+ 'cache-control': 'no-cache',
16
+ 'content-type': 'application/json',
17
+ # Add your other headers here
18
+ }
19
+ self.session = None # Initialize the session in the constructor
20
+ self.access_token = None
21
+
22
+ async def create_session(self):
23
+ self.session = aiohttp.ClientSession(headers=self.headers)
24
+
25
+ async def close_session(self):
26
+ if self.session:
27
+ await self.session.close()
28
+
29
+ async def signin(self):
30
+ url = f"{self.base_url}/auth/signin"
31
+ payload = {
32
+ "username": self.username,
33
+ "password": self.password
34
+ }
35
+
36
+ if not self.session:
37
+ await self.create_session()
38
+
39
+ async with self.session.post(url, json=payload) as response:
40
+ response_data = await response.json()
41
+ self.access_token = response_data['auth']['accessToken']
42
+ return response_data
43
+
44
+ async def make_request(self, tts_request: TTSGenerateRequest):
45
+ if not self.session:
46
+ await self.create_session()
47
+
48
+ if not self.access_token:
49
+ await self.signin()
50
+
51
+ headers_with_auth = self.headers.copy()
52
+ headers_with_auth['authorization'] = f"Bearer {self.access_token}"
53
+
54
+ url = f"{self.base_url}/speech/text-to-speech"
55
+
56
+ async with self.session.post(url, json=tts_request.dict(), headers=headers_with_auth) as response:
57
+ if response.status == 401:
58
+ # If a 401 error is encountered, sign in again to update the access token
59
+ await self.signin()
60
+ # Retry the request with the updated access token
61
+ headers_with_auth['authorization'] = f"Bearer {self.access_token}"
62
+ async with self.session.post(url, json=tts_request.dict(), headers=headers_with_auth) as retry_response:
63
+ response_text = await retry_response.json()
64
+ return response_text
65
+ else:
66
+ response_text = await response.json()
67
+ return response_text
68
+
69
+ async def check_status(self, tts_status: StatusRequest):
70
+ if not self.session:
71
+ await self.create_session()
72
+
73
+ if not self.access_token:
74
+ await self.signin()
75
+
76
+ headers_with_auth = self.headers.copy()
77
+ headers_with_auth['authorization'] = f"Bearer {self.access_token}"
78
+
79
+ url = f"{self.base_url}/speech/text-to-speech/{tts_status.requestId}"
80
+
81
+ async with self.session.get(url, headers=headers_with_auth) as response:
82
+ if response.status == 401:
83
+ # If a 401 error is encountered, sign in again to update the access token
84
+ await self.signin()
85
+ # Retry the request with the updated access token
86
+ headers_with_auth['authorization'] = f"Bearer {self.access_token}"
87
+ async with self.session.get(url, headers=headers_with_auth) as retry_response:
88
+ response_text = await retry_response.json()
89
+ return response_text
90
+ else:
91
+ response_text = await response.json()
92
+ return response_text
93
+
94
+
95
+
96
+
97
+ async def __aenter__(self):
98
+ if not self.session:
99
+ await self.create_session()
100
+ return self
101
+
102
+ async def __aexit__(self, exc_type, exc_value, traceback):
103
+ await self.close_session()
104
+
105
+ # Example usage:
106
+ if __name__ == "__main__":
107
+ class Speak(BaseModel):
108
+ paragraphId: str
109
+ text: str
110
+ speaker: str
111
+
112
+ class TTSGenerateRequest(BaseModel):
113
+ paragraphs: [Speak]
114
+ requestId: str
115
+ workspaceId: str
116
+
117
+ async def main():
118
+ username = "veyivib549@gronasu.com"
119
+ password = "k7bNvgmJUda3yEG"
120
+
121
+ # Create a TTSGenerateRequest object
122
+ tts_request = TTSGenerateRequest(
123
+ paragraphs=[
124
+ Speak(
125
+ paragraphId="6f05p",
126
+ text="<speak>Hey Daniel. Are you ok?. Manchester United almost lost yesterday </speak>",
127
+ speaker="c60166365edf46589657770d"
128
+ )
129
+ ],
130
+ requestId="7d6018ae-9617-4d22-879f-5e67283fa140",
131
+ workspaceId="f84fd58e-2899-4531-9f51-77c155c1e294"
132
+ )
133
+
134
+ async with PodcastleAPI(username, password) as podcastle_api:
135
+ # Make the TTS request using the TTSGenerateRequest object
136
+ response_text = await podcastle_api.make_request(tts_request)
137
+ print(response_text)
138
+
139
+ loop = asyncio.get_event_loop()
140
+ loop.run_until_complete(main())
App/TTS/utils/__init__.py ADDED
File without changes
App/app.py CHANGED
@@ -1,7 +1,7 @@
1
  from fastapi import FastAPI
2
 
3
  from fastapi.middleware.gzip import GZipMiddleware
4
-
5
 
6
  from .Embedding.EmbeddingRoutes import embeddigs_router
7
 
@@ -39,3 +39,4 @@ async def landing_page():
39
 
40
 
41
  app.include_router(embeddigs_router)
 
 
1
  from fastapi import FastAPI
2
 
3
  from fastapi.middleware.gzip import GZipMiddleware
4
+ from .TTS.TTSRoutes import tts_router
5
 
6
  from .Embedding.EmbeddingRoutes import embeddigs_router
7
 
 
39
 
40
 
41
  app.include_router(embeddigs_router)
42
+ app.include_router(tts_router)