forked from dominikglandorf/Podcastify
-
Notifications
You must be signed in to change notification settings - Fork 0
/
generator.py
109 lines (87 loc) · 4.24 KB
/
generator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
from openai import OpenAI
from dotenv import load_dotenv
import os
load_dotenv()
MIN_WORDS = int(os.getenv("MIN_WORDS"))
MAX_GENERATED_TOKENS = int(os.getenv("MAX_GENERATED_TOKENS"))
MAX_WORDS_QA = int(os.getenv("MAX_WORDS_QA")) if os.getenv("MAX_WORDS_QA") else 20
DEFAULT_LENGTH_MIN = int(os.getenv("DEFAULT_LENGTH_MIN")) if os.getenv("DEFAULT_LENGTH_MIN") else 1
MODEL = os.getenv("MODEL")
TEMPERATURE = float(os.getenv("TEMPERATURE"))
proficiency_descriptors = {
"C1": "Understands a wide range of material, including non-standard usage, with attention to finer details and implicit attitudes.",
"B2": "Understands standard language in social, professional, or academic contexts, identifying viewpoints, attitudes, and mood.",
"B1": "Understands main points of familiar topics and narratives delivered clearly and slowly in standard language.",
"A2": "Understands essential information in everyday matters and simple stories if delivered clearly and slowly.",
"A1": "Can pick out concrete information (e.g. places and times) from short recordings on familiar everyday topics, provided they are delivered very slowly and clearly."
}
def describe_level(level):
if level in proficiency_descriptors:
return proficiency_descriptors[level]
else:
return ""
def chunk(text):
output = []
chunk = ""
for word in text.split():
chunk += word + " "
if len(chunk.split()) >= MIN_WORDS and ("!" in word or "." in word or "?" in word):
output.append(chunk)
chunk = ""
return output
def generate(language, language_level, topic, history=[], new_words=[], length=DEFAULT_LENGTH_MIN):
if length == None: length = DEFAULT_LENGTH_MIN
prompt = f"Generate a podcast with one speaker in '{language}' about '{topic}' using language on CEFR level '{language_level}' (defined as \"{describe_level(language_level)}\"). It should be roughly {length} minutes long. Just return the text of the speaker. Do not include a title."
print(MAX_GENERATED_TOKENS)
client = OpenAI()
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{
"role": "user",
"content": prompt
}
]
if history:
messages.append({
"role": "assistant",
"content": " ".join(history)
})
messages.append({
"role": "user",
"content": "Continue the podcast (without talking about continuing it)."
})
if new_words:
messages[-1]['content'] += " Try to use these words in the text generation: " + ", ".join(new_words)
completion = client.chat.completions.create(
model="gpt-4o-mini",
messages=messages,
max_completion_tokens=MAX_GENERATED_TOKENS)
text = completion.choices[0].message.content
return chunk(text)
def define(word, context):
prompt = f"Describe the definition of '{word}' within 25 words, and explain what it means in the context of '{context}'. Just output the definition."
client = OpenAI()
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{
"role": "user",
"content": prompt
}
]
completion = client.chat.completions.create(model="gpt-4o-mini", messages=messages)
return completion.choices[0].message.content
def q_and_a(language, language_level, topic, messages, history=[], new_words=[]):
client = OpenAI()
api_messages = [
{"role": "system", "content": f"You are a language teacher that wants to have a conversation about a podcast on the topic of '{topic}' in '{language} with a learner on CEFR level {language_level}. This was the podcast {' '.join(history)}. Just return the next message text in the conversation. Use vocabulary and grammar from the podcast. Be brief: Maximum {MAX_WORDS_QA} words per response. "},
]
for message in messages:
api_messages.append({
"role": message['role'],
"content": message['content']
})
completion = client.chat.completions.create(
model="gpt-4o-mini",
messages=api_messages,
max_completion_tokens=MAX_GENERATED_TOKENS)
return completion.choices[0].message.content, completion.choices[0].message.role